## Set Global Seed 

In [1]:
import os
import random
import numpy as np
import torch
import transformers

def set_all_seeds(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed = 260615
set_all_seeds(seed)

print("The global seed " + str(seed))

The global seed 260615


## Hyperparameters

In [2]:
# LANGUAGE

_LANGUAGE_         = 'en'

In [3]:
# MODEL CLASSIFICATION

_PRETRAINED_LM_    = 'vinai/bertweet-base'
_PREPROCESS_TEXT_  = True
_TWEET_BATCH_SIZE_ = 5
_ADAPTER_CONFIG_   = transformers.UniPELTConfig()
_MAX_SEQ_LEN_      = 128

In [4]:
# TRAIN

_OUTPUT_DIR_       = 'checkPoints'
_LOGGING_STEPS_    = 50
_NUM_AUTHORS_      = 256
_K_FOLD_CV_        = 5
_NO_GPUS_          = 1
_BATCH_SIZE_       = int(32 / _NO_GPUS_)
_EPOCHS_           = 10
_LEARNING_RATE_    = 1e-4

# PREDICTIONS

_DATASET_          = 'PAN17'
_PRED_DIR_         = 'UniPELT'

## Other parameters

In [5]:
# LABEL DICTONARIES -----------------------------------------------------------------------

gender_dict    = {'female': 0, 'male':   1}
varietyEN_dict = {'australia': 0, 'canada': 1, 'great britain': 2, 'ireland': 3, 'new zealand': 4, 'united states': 5}
varietyES_dict = {'argentina': 0, 'chile': 1, 'colombia': 2, 'mexico': 3, 'peru': 4, 'spain': 5, 'venezuela': 6}  

genderEN_hip  = {0: 'I’m a female', 1: 'I’m a male'}
genderES_hip  = {0: 'Mi nombre es María', 1: 'Mi nombre es José'}

In [6]:
# SET LANGUAGE DICTIONARIES

if _LANGUAGE_ == 'en':
    gender_hip   = genderEN_hip
    variety_dict = varietyEN_dict

elif _LANGUAGE_ == 'es':
    gender_hip   = genderES_hip
    variety_dict = varietyES_dict

In [7]:
# SET LANGUAGE TOKENIZER

from transformers import AutoTokenizer, PretrainedConfig

tokenizer = AutoTokenizer.from_pretrained(_PRETRAINED_LM_)
vocab = tokenizer.get_vocab()

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


## Datasets

In [8]:
from tools.DataLoaders import BasePAN17

baseTrain  = BasePAN17(Dir             = 'data/2017',
                      split            = 'train',
                      language         = _LANGUAGE_,
                      tokenizer        = tokenizer,
                      gender_dict      = gender_dict,
                      variety_dict     = variety_dict,
                      tweet_batch_size = 1,
                      max_seq_len      = _MAX_SEQ_LEN_,
                      preprocess_text  = _PREPROCESS_TEXT_)

baseTest  = BasePAN17(Dir              = 'data/2017',
                      split            = 'test',
                      language         = _LANGUAGE_,
                      tokenizer        = tokenizer,
                      gender_dict      = gender_dict,
                      variety_dict     = variety_dict,
                      tweet_batch_size = 1,
                      max_seq_len      = _MAX_SEQ_LEN_,
                      preprocess_text  = _PREPROCESS_TEXT_)


Reading data...
    Done
Preprocessing text...
    Done
Tokenizing...
    Done
Merging data...
    Done

Total Instances: 360000


Reading data...
    Done
Preprocessing text...
    Done
Tokenizing...
    Done
Merging data...
    Done

Total Instances: 240000



In [9]:
crossVal_splits = []

for val_idx in range(_K_FOLD_CV_):
    
    authors_train, authors_val = baseTrain.cross_val(_K_FOLD_CV_, val_idx, _NUM_AUTHORS_)
    
    crossVal_splits.append( (authors_train, authors_val) )

In [10]:
from tools.DataLoaders import BasePAN17

baseTrain  = BasePAN17(Dir             = 'data/2017',
                      split            = 'train',
                      language         = _LANGUAGE_,
                      tokenizer        = tokenizer,
                      gender_dict      = gender_dict,
                      variety_dict     = variety_dict,
                      tweet_batch_size = _TWEET_BATCH_SIZE_,
                      max_seq_len      = _MAX_SEQ_LEN_,
                      preprocess_text  = _PREPROCESS_TEXT_)

baseTest  = BasePAN17(Dir              = 'data/2017',
                      split            = 'test',
                      language         = _LANGUAGE_,
                      tokenizer        = tokenizer,
                      gender_dict      = gender_dict,
                      variety_dict     = variety_dict,
                      tweet_batch_size = _TWEET_BATCH_SIZE_,
                      max_seq_len      = _MAX_SEQ_LEN_,
                      preprocess_text  = _PREPROCESS_TEXT_)


Reading data...
    Done
Preprocessing text...
    Done
Tokenizing...
    Done
Merging data...
    Done

Total Instances: 72000


Reading data...
    Done
Preprocessing text...
    Done
Tokenizing...
    Done
Merging data...
    Done

Total Instances: 48000



In [11]:
from tools.DataLoaders import DatasetPAN17

Test = DatasetPAN17(baseTest, 'gender')

In [12]:
crossVal_splits[0][0]

['3d8285a6183b250bf7810f1110ebd408',
 '75369e6c54e6b643c7b5112fe484d048',
 'e15ff8259c2b18778594e47a4bce375a',
 'b2e5086a0e2f263f48ba1bec23dcc32',
 '7c61c34e980e22bda49e63f235a08c50',
 '5a61761418a8db2ccdff2b2aacc3a64e',
 'fa0d4331d8a79340d0720556f04dcc79',
 '8bbaf8237695dffe77a19e05d1bdc10c',
 '87c9d7b7da60b14596ee5d3067cb4986',
 'd87a5854cac462002e88ed0a2627a6cb',
 'bfddbb80aa7c83a342204903aeae6ef3',
 '571467b2067811d455f48a7de278615',
 '86b09df66b0d60ac38fbecc20d055718',
 '14a76fcf968fc12ce7765bceb677857e',
 '859396803d328b9624c346da27881926',
 '1fadbb99bb6cce129ab2bcb1d81a2667',
 'ce3a52089fd1e987bc51912ca4e7891b',
 '1d7765afa987ccb8f0467d6eae3a5e78',
 '827c2dc86e11b9db449cdeac7578830d',
 'faa853395e53925c7e3a99c71740dd0b',
 'ff26ace44cfced9f9b329afc23aaeda6',
 '1342f1fee84567cda4741b41a770b4e0',
 '71f48d1b1e4f84048347bf1a3a916e6',
 '31056ce770965577802c99efdc1c9f43',
 '337493831ca980b59bb5047f96228032',
 '2b3664ccd638ddf6def74cee446c1b0d',
 '99b223ff10578d0b88c2101447dafc79',
 '1b

## Training

In [13]:
from transformers import TrainingArguments

samples = 2 * _NUM_AUTHORS_ * int(100 / _TWEET_BATCH_SIZE_)
_LOGGING_STEPS_ = int(samples / _BATCH_SIZE_)

training_args = TrainingArguments(
    learning_rate               = _LEARNING_RATE_,
    num_train_epochs            = _EPOCHS_,
    per_device_train_batch_size = _BATCH_SIZE_,
    per_device_eval_batch_size  = 200,
    logging_steps               = _LOGGING_STEPS_,
    output_dir                  = _OUTPUT_DIR_,
    save_total_limit            = 10,
    overwrite_output_dir        = True,
    remove_unused_columns       = False,
)

In [14]:
from transformers import AutoAdapterModel
from tools.DataLoaders import DatasetCrossVal
from transformers import AdapterTrainer
from tools.Testing import compute_author_predictions
from sklearn.metrics import f1_score, classification_report
import pickle

# initialize base model

model = AutoAdapterModel.from_pretrained(_PRETRAINED_LM_)


# train

task = 'gender'

f1s_soft = []
f1s_hard = []

for split in range( _K_FOLD_CV_ ):
    
    # loaders for current split ------------------------------------------
    
    authors_train, authors_val = crossVal_splits[split]
    
    Train = DatasetCrossVal(baseTrain, authors_train, task)
    Val   = DatasetCrossVal(baseTrain, authors_val  , task)
    
    
    # add adapter --------------------------------------------------------
    
    model.add_adapter(adapter_name = task,config = _ADAPTER_CONFIG_)
    model.add_classification_head(head_name = task, num_labels = 2)
    
    model.set_active_adapters(task)
    model.train_adapter(task)
    
    
    # create trainer and train -------------------------------------------
        
    trainer = AdapterTrainer(
        model           = model,
        args            = training_args,
        train_dataset   = Train,
    )
    trainer.args._n_gpu = _NO_GPUS_

    trainer.train()
    
    
    # get predictions ----------------------------------------------------
    
    results            = trainer.predict(Val)
    author_predictions = compute_author_predictions(Val, results.predictions, 'gender', 2)
    
    
    # report metrics -----------------------------------------------------
    
    report = {'soft': classification_report(author_predictions['true'], author_predictions['pred_soft'], digits=4), 
               'hard': classification_report(author_predictions['true'], author_predictions['pred_hard'], digits=4)}

    f1s_soft.append( f1_score(author_predictions['true'], author_predictions['pred_soft'], average = 'macro') )
    f1s_hard.append( f1_score(author_predictions['true'], author_predictions['pred_hard'], average = 'macro') )

    print("Results with split " + str(split + 1) + ":\n")
    print("soft voting:\n", report['soft'], '\n')
    print("hard voting:\n", report['hard'])
     
    
    # save predictions ----------------------------------------------------
    
    DIR = 'results/' + _DATASET_ + '/' + _LANGUAGE_ + '/' + _PRED_DIR_ + '/' + str(_NUM_AUTHORS_) + '_authors/split_' + str(split + 1) + '/'
    if not os.path.exists(DIR):
        os.makedirs(DIR)

    with open(DIR + 'predictions.pickle', 'wb') as f:
        pickle.dump(author_predictions, f)

    with open(DIR + 'report.txt', 'w') as f:
        f.write("soft voting:\n" + report['soft'] + '\n\n')
        f.write("hard voting:\n" + report['hard'])
    
    
    # delete adapter -------------------------------------------------------
    
    model.delete_adapter(task)
    model.delete_head(task)

Some weights of the model checkpoint at pysentimiento/robertuito-base-uncased were not used when initializing RobertaAdapterModel: ['lm_head.dense.bias', 'lm_head.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaAdapterModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaAdapterModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaAdapterModel were not initialized from the model checkpoint at pysentimiento/robertuito-base-uncased and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be 

Step,Training Loss
320,0.607
640,0.5508
960,0.5144
1280,0.4672
1600,0.416
1920,0.3615
2240,0.3158
2560,0.2763
2880,0.2355
3200,0.2144


Saving model checkpoint to checkPoints/checkpoint-500
Configuration saved in checkPoints/checkpoint-500/gender/adapter_config.json
Module weights saved in checkPoints/checkpoint-500/gender/pytorch_adapter.bin
Configuration saved in checkPoints/checkpoint-500/gender/head_config.json
Module weights saved in checkPoints/checkpoint-500/gender/pytorch_model_head.bin
Configuration saved in checkPoints/checkpoint-500/gender/head_config.json
Module weights saved in checkPoints/checkpoint-500/gender/pytorch_model_head.bin
Saving model checkpoint to checkPoints/checkpoint-1000
Configuration saved in checkPoints/checkpoint-1000/gender/adapter_config.json
Module weights saved in checkPoints/checkpoint-1000/gender/pytorch_adapter.bin
Configuration saved in checkPoints/checkpoint-1000/gender/head_config.json
Module weights saved in checkPoints/checkpoint-1000/gender/pytorch_model_head.bin
Configuration saved in checkPoints/checkpoint-1000/gender/head_config.json
Module weights saved in checkPoints/c

100%|████████████████████████████████████████████████████████████████████████| 840/840 [00:03<00:00, 238.61it/s]


Results with split 1:

soft voting:
               precision    recall  f1-score   support

           0     0.7488    0.7667    0.7576       420
           1     0.7610    0.7429    0.7518       420

    accuracy                         0.7548       840
   macro avg     0.7549    0.7548    0.7547       840
weighted avg     0.7549    0.7548    0.7547       840
 

hard voting:
               precision    recall  f1-score   support

           0     0.7383    0.7857    0.7612       420
           1     0.7710    0.7214    0.7454       420

    accuracy                         0.7536       840
   macro avg     0.7546    0.7536    0.7533       840
weighted avg     0.7546    0.7536    0.7533       840



Adding adapter 'gender'.
Adding head 'gender' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True}.
***** Running training *****
  Num examples = 10240
  Num Epochs = 10
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 3200


Step,Training Loss
320,0.6033
640,0.5402
960,0.4844
1280,0.4242
1600,0.3723
1920,0.3067
2240,0.2586
2560,0.2223
2880,0.1839
3200,0.1592


Saving model checkpoint to checkPoints/checkpoint-500
Configuration saved in checkPoints/checkpoint-500/gender/adapter_config.json
Module weights saved in checkPoints/checkpoint-500/gender/pytorch_adapter.bin
Configuration saved in checkPoints/checkpoint-500/gender/head_config.json
Module weights saved in checkPoints/checkpoint-500/gender/pytorch_model_head.bin
Configuration saved in checkPoints/checkpoint-500/gender/head_config.json
Module weights saved in checkPoints/checkpoint-500/gender/pytorch_model_head.bin
Saving model checkpoint to checkPoints/checkpoint-1000
Configuration saved in checkPoints/checkpoint-1000/gender/adapter_config.json
Module weights saved in checkPoints/checkpoint-1000/gender/pytorch_adapter.bin
Configuration saved in checkPoints/checkpoint-1000/gender/head_config.json
Module weights saved in checkPoints/checkpoint-1000/gender/pytorch_model_head.bin
Configuration saved in checkPoints/checkpoint-1000/gender/head_config.json
Module weights saved in checkPoints/c

100%|████████████████████████████████████████████████████████████████████████| 840/840 [00:03<00:00, 230.29it/s]


Results with split 2:

soft voting:
               precision    recall  f1-score   support

           0     0.7718    0.7167    0.7432       420
           1     0.7356    0.7881    0.7609       420

    accuracy                         0.7524       840
   macro avg     0.7537    0.7524    0.7521       840
weighted avg     0.7537    0.7524    0.7521       840
 

hard voting:
               precision    recall  f1-score   support

           0     0.7605    0.7333    0.7467       420
           1     0.7425    0.7690    0.7556       420

    accuracy                         0.7512       840
   macro avg     0.7515    0.7512    0.7511       840
weighted avg     0.7515    0.7512    0.7511       840



Adding adapter 'gender'.
Adding head 'gender' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True}.
***** Running training *****
  Num examples = 10240
  Num Epochs = 10
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 3200


Step,Training Loss
320,0.6279
640,0.5807
960,0.5368
1280,0.4895
1600,0.4383
1920,0.3772
2240,0.3315
2560,0.2837
2880,0.2516
3200,0.2246


Saving model checkpoint to checkPoints/checkpoint-500
Configuration saved in checkPoints/checkpoint-500/gender/adapter_config.json
Module weights saved in checkPoints/checkpoint-500/gender/pytorch_adapter.bin
Configuration saved in checkPoints/checkpoint-500/gender/head_config.json
Module weights saved in checkPoints/checkpoint-500/gender/pytorch_model_head.bin
Configuration saved in checkPoints/checkpoint-500/gender/head_config.json
Module weights saved in checkPoints/checkpoint-500/gender/pytorch_model_head.bin
Saving model checkpoint to checkPoints/checkpoint-1000
Configuration saved in checkPoints/checkpoint-1000/gender/adapter_config.json
Module weights saved in checkPoints/checkpoint-1000/gender/pytorch_adapter.bin
Configuration saved in checkPoints/checkpoint-1000/gender/head_config.json
Module weights saved in checkPoints/checkpoint-1000/gender/pytorch_model_head.bin
Configuration saved in checkPoints/checkpoint-1000/gender/head_config.json
Module weights saved in checkPoints/c

100%|████████████████████████████████████████████████████████████████████████| 840/840 [00:03<00:00, 236.07it/s]


Results with split 3:

soft voting:
               precision    recall  f1-score   support

           0     0.7526    0.6881    0.7189       420
           1     0.7127    0.7738    0.7420       420

    accuracy                         0.7310       840
   macro avg     0.7327    0.7310    0.7305       840
weighted avg     0.7327    0.7310    0.7305       840
 

hard voting:
               precision    recall  f1-score   support

           0     0.7395    0.7095    0.7242       420
           1     0.7208    0.7500    0.7351       420

    accuracy                         0.7298       840
   macro avg     0.7301    0.7298    0.7297       840
weighted avg     0.7301    0.7298    0.7297       840



Adding adapter 'gender'.
Adding head 'gender' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True}.
***** Running training *****
  Num examples = 10240
  Num Epochs = 10
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 3200


Step,Training Loss
320,0.6003
640,0.55
960,0.5102
1280,0.4604
1600,0.4056
1920,0.3451
2240,0.2998
2560,0.2569
2880,0.2182
3200,0.1922


Saving model checkpoint to checkPoints/checkpoint-500
Configuration saved in checkPoints/checkpoint-500/gender/adapter_config.json
Module weights saved in checkPoints/checkpoint-500/gender/pytorch_adapter.bin
Configuration saved in checkPoints/checkpoint-500/gender/head_config.json
Module weights saved in checkPoints/checkpoint-500/gender/pytorch_model_head.bin
Configuration saved in checkPoints/checkpoint-500/gender/head_config.json
Module weights saved in checkPoints/checkpoint-500/gender/pytorch_model_head.bin
Saving model checkpoint to checkPoints/checkpoint-1000
Configuration saved in checkPoints/checkpoint-1000/gender/adapter_config.json
Module weights saved in checkPoints/checkpoint-1000/gender/pytorch_adapter.bin
Configuration saved in checkPoints/checkpoint-1000/gender/head_config.json
Module weights saved in checkPoints/checkpoint-1000/gender/pytorch_model_head.bin
Configuration saved in checkPoints/checkpoint-1000/gender/head_config.json
Module weights saved in checkPoints/c

100%|████████████████████████████████████████████████████████████████████████| 840/840 [00:03<00:00, 235.49it/s]


Results with split 4:

soft voting:
               precision    recall  f1-score   support

           0     0.7468    0.8214    0.7823       420
           1     0.8016    0.7214    0.7594       420

    accuracy                         0.7714       840
   macro avg     0.7742    0.7714    0.7709       840
weighted avg     0.7742    0.7714    0.7709       840
 

hard voting:
               precision    recall  f1-score   support

           0     0.7396    0.8452    0.7889       420
           1     0.8194    0.7024    0.7564       420

    accuracy                         0.7738       840
   macro avg     0.7795    0.7738    0.7726       840
weighted avg     0.7795    0.7738    0.7726       840



Adding adapter 'gender'.
Adding head 'gender' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True}.
***** Running training *****
  Num examples = 10240
  Num Epochs = 10
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 3200


Step,Training Loss
320,0.62
640,0.5612
960,0.5106
1280,0.4457
1600,0.4004
1920,0.3366
2240,0.2906
2560,0.252
2880,0.2121
3200,0.1869


Saving model checkpoint to checkPoints/checkpoint-500
Configuration saved in checkPoints/checkpoint-500/gender/adapter_config.json
Module weights saved in checkPoints/checkpoint-500/gender/pytorch_adapter.bin
Configuration saved in checkPoints/checkpoint-500/gender/head_config.json
Module weights saved in checkPoints/checkpoint-500/gender/pytorch_model_head.bin
Configuration saved in checkPoints/checkpoint-500/gender/head_config.json
Module weights saved in checkPoints/checkpoint-500/gender/pytorch_model_head.bin
Saving model checkpoint to checkPoints/checkpoint-1000
Configuration saved in checkPoints/checkpoint-1000/gender/adapter_config.json
Module weights saved in checkPoints/checkpoint-1000/gender/pytorch_adapter.bin
Configuration saved in checkPoints/checkpoint-1000/gender/head_config.json
Module weights saved in checkPoints/checkpoint-1000/gender/pytorch_model_head.bin
Configuration saved in checkPoints/checkpoint-1000/gender/head_config.json
Module weights saved in checkPoints/c

100%|████████████████████████████████████████████████████████████████████████| 840/840 [00:03<00:00, 231.36it/s]

Results with split 5:

soft voting:
               precision    recall  f1-score   support

           0     0.7500    0.7571    0.7536       420
           1     0.7548    0.7476    0.7512       420

    accuracy                         0.7524       840
   macro avg     0.7524    0.7524    0.7524       840
weighted avg     0.7524    0.7524    0.7524       840
 

hard voting:
               precision    recall  f1-score   support

           0     0.7277    0.7952    0.7600       420
           1     0.7743    0.7024    0.7366       420

    accuracy                         0.7488       840
   macro avg     0.7510    0.7488    0.7483       840
weighted avg     0.7510    0.7488    0.7483       840






In [15]:
# report statistics

print('Soft results: ', f1s_soft)
print('\nHard results: ', f1s_hard)

f1s_soft = np.array(f1s_soft)
f1s_hard = np.array(f1s_hard)

FewShot_Results = {'soft': [f1s_soft.mean(), f1s_soft.std()], 'hard': [f1s_hard.mean(), f1s_hard.std()]}

print('\n\nSoft statistics: ')
print('\t[avg, std]:', FewShot_Results['soft'])

print('\nHard statistics: ')
print('\t[avg, std]:', FewShot_Results['hard'])

Soft results:  [0.754727143869596, 0.7520647083865475, 0.7304573025284535, 0.7708557107053348, 0.7523753373092361]

Hard results:  [0.7533165643075116, 0.7511111111111111, 0.729651175155357, 0.7726495726495727, 0.7482668848240042]


Soft statistics: 
	[avg, std]: [0.7520960405598336, 0.012862051940220536]

Hard statistics: 
	[avg, std]: [0.7509990616095112, 0.013691786210634417]


## Test the best split

In [17]:
from transformers import AutoAdapterModel
from tools.DataLoaders import DatasetCrossVal, DatasetPAN17
from transformers import AdapterTrainer
from tools.Testing import compute_author_predictions
from sklearn.metrics import f1_score, classification_report
import pickle

# initialize base model

model = AutoAdapterModel.from_pretrained(_PRETRAINED_LM_)


# train

task = 'gender'

split = 3

# loaders for current split ------------------------------------------
    
authors_train, authors_val = crossVal_splits[split]

Train = DatasetCrossVal(baseTrain, authors_train, task)
Val   = DatasetCrossVal(baseTrain, authors_val  , task)


# add adapter --------------------------------------------------------

model.add_adapter(adapter_name = task,config = _ADAPTER_CONFIG_)
model.add_classification_head(head_name = task, num_labels = 2)

model.set_active_adapters(task)
model.train_adapter(task)


# create trainer and train -------------------------------------------

trainer = AdapterTrainer(
    model           = model,
    args            = training_args,
    train_dataset   = Train,
)
trainer.args._n_gpu = _NO_GPUS_

trainer.train()


# get predictions ----------------------------------------------------

results            = trainer.predict(Test)
author_predictions = compute_author_predictions(baseTest, results.predictions, 'gender', 2)


# report metrics -----------------------------------------------------

report = {'soft': classification_report(author_predictions['true'], author_predictions['pred_soft'], digits=4), 
           'hard': classification_report(author_predictions['true'], author_predictions['pred_hard'], digits=4)}

print("Results with split " + str(split + 1) + ":\n")
print("soft voting:\n", report['soft'], '\n')
print("hard voting:\n", report['hard'])


# delete adapter -------------------------------------------------------

model.delete_adapter(task)
model.delete_head(task)

loading configuration file https://huggingface.co/pysentimiento/robertuito-base-uncased/resolve/main/config.json from cache at /001/usuarios/isaac.bribiesca/.cache/huggingface/transformers/5212cb9b5b32726fce956daa9a21ee0a0c2b6e54c54d1af58c678217d85f8143.4cce50d5a926bf18fe43f2ea8d4596b505e97a64e6e700e993def66b06f1c83b
Model config RobertaConfig {
  "_name_or_path": "pysentimiento/robertuito-base-uncased",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 130,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "

Step,Training Loss
320,0.611
640,0.5576
960,0.5131
1280,0.4617
1600,0.4056
1920,0.346
2240,0.3045
2560,0.26
2880,0.2217
3200,0.1953


Saving model checkpoint to checkPoints/checkpoint-500
Configuration saved in checkPoints/checkpoint-500/gender/adapter_config.json
Module weights saved in checkPoints/checkpoint-500/gender/pytorch_adapter.bin
Configuration saved in checkPoints/checkpoint-500/gender/head_config.json
Module weights saved in checkPoints/checkpoint-500/gender/pytorch_model_head.bin
Configuration saved in checkPoints/checkpoint-500/gender/head_config.json
Module weights saved in checkPoints/checkpoint-500/gender/pytorch_model_head.bin
Saving model checkpoint to checkPoints/checkpoint-1000
Configuration saved in checkPoints/checkpoint-1000/gender/adapter_config.json
Module weights saved in checkPoints/checkpoint-1000/gender/pytorch_adapter.bin
Configuration saved in checkPoints/checkpoint-1000/gender/head_config.json
Module weights saved in checkPoints/checkpoint-1000/gender/pytorch_model_head.bin
Configuration saved in checkPoints/checkpoint-1000/gender/head_config.json
Module weights saved in checkPoints/c

100%|██████████████████████████████████████████████████████████████████████| 2800/2800 [00:19<00:00, 143.16it/s]

Results with split 4:

soft voting:
               precision    recall  f1-score   support

           0     0.7521    0.8236    0.7862      1400
           1     0.8051    0.7286    0.7649      1400

    accuracy                         0.7761      2800
   macro avg     0.7786    0.7761    0.7756      2800
weighted avg     0.7786    0.7761    0.7756      2800
 

hard voting:
               precision    recall  f1-score   support

           0     0.7322    0.8379    0.7815      1400
           1     0.8105    0.6936    0.7475      1400

    accuracy                         0.7657      2800
   macro avg     0.7714    0.7657    0.7645      2800
weighted avg     0.7714    0.7657    0.7645      2800




