# Paquetes

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import transformers
import os

# Parametros

In [8]:
# MODEL

_LANGUAGE_         = 'es'
_PRETRAINED_LM_    = 'pysentimiento/robertuito-base-cased'
_PREPROCESS_TEXT_  = True
_TWEET_BATCH_SIZE_ = 1
_ADAPTER_CONFIG_   = transformers.ParallelConfig()
_MAX_SEQ_LEN_      = 128
_OUTPUT_DIR_       = 'parallel_adapter_checkPoints_es'
_LOGGING_STEPS_    = 50
_SAVE_DIR_         = 'parallel_adapter_weights'


# TRAIN

_NO_GPUS_          = 4
_BATCH_SIZE_       = 100
_EPOCHS_           = {'gender': 8, 'variety': 10}
_LEARNING_RATE_    = 1e-4

# Dataset

In [3]:
# LABEL DICTONARIES -----------------------------------------------------------------------

gender_dict    = {'female': 0, 
                  'male':   1}

varietyEN_dict = {'australia'    : 0,
                  'canada'       : 1,
                  'great britain' : 2,
                  'ireland'      : 3,
                  'new zealand'   : 4,
                  'united states': 5}

varietyES_dict = {'argentina': 0,
                  'chile'    : 1,
                  'colombia' : 2,
                  'mexico'   : 3,
                  'peru'     : 4,
                  'spain'    : 5,
                  'venezuela': 6}    

In [4]:
# SET LANGUAGE DIRECTORY

if _LANGUAGE_ == 'en':
    variety_dict = varietyEN_dict

elif _LANGUAGE_ == 'es':
    variety_dict = varietyES_dict

In [5]:
# SET LANGUAGE TOKENIZER

from transformers import AutoTokenizer


tokenizer = AutoTokenizer.from_pretrained(_PRETRAINED_LM_)
    
vocab = tokenizer.get_vocab()

In [6]:
# CREATE ONE INSTANCE PER DATA SPLIT

from DatasetPAN17 import BasePAN17, DatasetPAN17

baseTrain = BasePAN17(Dir              = 'data',
                      split            = 'train',
                      language         = _LANGUAGE_,
                      tokenizer        = tokenizer,
                      gender_dict      = gender_dict,
                      variety_dict     = variety_dict,
                      tweet_batch_size = _TWEET_BATCH_SIZE_,
                      max_seq_len      = _MAX_SEQ_LEN_,
                      preprocess_text  = _PREPROCESS_TEXT_)

baseTest  = BasePAN17(Dir              = 'data',
                      split            = 'test',
                      language         = _LANGUAGE_,
                      tokenizer        = tokenizer,
                      gender_dict      = gender_dict,
                      variety_dict     = variety_dict,
                      tweet_batch_size = _TWEET_BATCH_SIZE_,
                      max_seq_len      = _MAX_SEQ_LEN_,
                      preprocess_text  = _PREPROCESS_TEXT_)


Reading data...
    Done
Preprocessing text...
    Done
Tokenizing...
    Done

Total Instances: 419998


Reading data...
    Done
Preprocessing text...
    Done
Tokenizing...
    Done

Total Instances: 280000



In [7]:
# CREATE DATA LOADER FOR EVERY TASK

tasks = ['gender', 'variety']

dataset_dict = {}
for task in tasks:
    dataset_dict[task] = DatasetPAN17(Base_Dataset = baseTrain, label = task)

# Model

In [9]:
from transformers import AutoAdapterModel


model = AutoAdapterModel.from_pretrained(_PRETRAINED_LM_)


Some weights of the model checkpoint at pysentimiento/robertuito-base-cased were not used when initializing RobertaAdapterModel: ['lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaAdapterModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaAdapterModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaAdapterModel were not initialized from the model checkpoint at pysentimiento/robertuito-base-cased and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able

# Adapters

In [10]:
num_v           = len(baseTrain.variety_dict)
num_labels_dict = {"gender": 2, "variety": num_v,}

# Add adapters
for task_name in tasks:
    
    model.add_adapter(
        adapter_name = task_name, 
        config       = _ADAPTER_CONFIG_
    )
    
    model.add_classification_head(
        head_name    = task_name,
        num_labels   = num_labels_dict[task_name],
      )

device = "cuda:0" if torch.cuda.is_available() else "cpu"
model = model.to(device)

# Training

In [11]:
from Training import train_model_with_adapters

train_model_with_adapters(model         = model,
                          dataset_dict  = dataset_dict,
                          epochs        = _EPOCHS_,
                          batch_size    = _BATCH_SIZE_,
                          no_gpus       = _NO_GPUS_,
                          output_dir    = _OUTPUT_DIR_,
                          logging_steps = _LOGGING_STEPS_,
                          learning_rate = _LEARNING_RATE_)

***** Running training *****
  Num examples = 419998
  Num Epochs = 8
  Instantaneous batch size per device = 100
  Total train batch size (w. parallel, distributed & accumulation) = 400
  Gradient Accumulation steps = 1
  Total optimization steps = 8400


Step,Training Loss
50,0.6576
100,0.6447
150,0.6334
200,0.6222
250,0.6281
300,0.6229
350,0.6167
400,0.6197
450,0.6137
500,0.6096


Saving model checkpoint to parallel_adapter_checkPoints_es/gender/checkpoint-500
Configuration saved in parallel_adapter_checkPoints_es/gender/checkpoint-500/gender/adapter_config.json
Module weights saved in parallel_adapter_checkPoints_es/gender/checkpoint-500/gender/pytorch_adapter.bin
Configuration saved in parallel_adapter_checkPoints_es/gender/checkpoint-500/gender/head_config.json
Module weights saved in parallel_adapter_checkPoints_es/gender/checkpoint-500/gender/pytorch_model_head.bin
Configuration saved in parallel_adapter_checkPoints_es/gender/checkpoint-500/variety/adapter_config.json
Module weights saved in parallel_adapter_checkPoints_es/gender/checkpoint-500/variety/pytorch_adapter.bin
Configuration saved in parallel_adapter_checkPoints_es/gender/checkpoint-500/variety/head_config.json
Module weights saved in parallel_adapter_checkPoints_es/gender/checkpoint-500/variety/pytorch_model_head.bin
Configuration saved in parallel_adapter_checkPoints_es/gender/checkpoint-500/ge

Saving model checkpoint to parallel_adapter_checkPoints_es/gender/checkpoint-3000
Configuration saved in parallel_adapter_checkPoints_es/gender/checkpoint-3000/gender/adapter_config.json
Module weights saved in parallel_adapter_checkPoints_es/gender/checkpoint-3000/gender/pytorch_adapter.bin
Configuration saved in parallel_adapter_checkPoints_es/gender/checkpoint-3000/gender/head_config.json
Module weights saved in parallel_adapter_checkPoints_es/gender/checkpoint-3000/gender/pytorch_model_head.bin
Configuration saved in parallel_adapter_checkPoints_es/gender/checkpoint-3000/variety/adapter_config.json
Module weights saved in parallel_adapter_checkPoints_es/gender/checkpoint-3000/variety/pytorch_adapter.bin
Configuration saved in parallel_adapter_checkPoints_es/gender/checkpoint-3000/variety/head_config.json
Module weights saved in parallel_adapter_checkPoints_es/gender/checkpoint-3000/variety/pytorch_model_head.bin
Configuration saved in parallel_adapter_checkPoints_es/gender/checkpoi

Saving model checkpoint to parallel_adapter_checkPoints_es/gender/checkpoint-5500
Configuration saved in parallel_adapter_checkPoints_es/gender/checkpoint-5500/gender/adapter_config.json
Module weights saved in parallel_adapter_checkPoints_es/gender/checkpoint-5500/gender/pytorch_adapter.bin
Configuration saved in parallel_adapter_checkPoints_es/gender/checkpoint-5500/gender/head_config.json
Module weights saved in parallel_adapter_checkPoints_es/gender/checkpoint-5500/gender/pytorch_model_head.bin
Configuration saved in parallel_adapter_checkPoints_es/gender/checkpoint-5500/variety/adapter_config.json
Module weights saved in parallel_adapter_checkPoints_es/gender/checkpoint-5500/variety/pytorch_adapter.bin
Configuration saved in parallel_adapter_checkPoints_es/gender/checkpoint-5500/variety/head_config.json
Module weights saved in parallel_adapter_checkPoints_es/gender/checkpoint-5500/variety/pytorch_model_head.bin
Configuration saved in parallel_adapter_checkPoints_es/gender/checkpoi

Saving model checkpoint to parallel_adapter_checkPoints_es/gender/checkpoint-8000
Configuration saved in parallel_adapter_checkPoints_es/gender/checkpoint-8000/gender/adapter_config.json
Module weights saved in parallel_adapter_checkPoints_es/gender/checkpoint-8000/gender/pytorch_adapter.bin
Configuration saved in parallel_adapter_checkPoints_es/gender/checkpoint-8000/gender/head_config.json
Module weights saved in parallel_adapter_checkPoints_es/gender/checkpoint-8000/gender/pytorch_model_head.bin
Configuration saved in parallel_adapter_checkPoints_es/gender/checkpoint-8000/variety/adapter_config.json
Module weights saved in parallel_adapter_checkPoints_es/gender/checkpoint-8000/variety/pytorch_adapter.bin
Configuration saved in parallel_adapter_checkPoints_es/gender/checkpoint-8000/variety/head_config.json
Module weights saved in parallel_adapter_checkPoints_es/gender/checkpoint-8000/variety/pytorch_model_head.bin
Configuration saved in parallel_adapter_checkPoints_es/gender/checkpoi

Step,Training Loss
50,1.5549
100,1.4087
150,1.4003
200,1.3669
250,1.3525
300,1.3613
350,1.3287
400,1.3352
450,1.3302
500,1.3154


Saving model checkpoint to parallel_adapter_checkPoints_es/variety/checkpoint-500
Configuration saved in parallel_adapter_checkPoints_es/variety/checkpoint-500/gender/adapter_config.json
Module weights saved in parallel_adapter_checkPoints_es/variety/checkpoint-500/gender/pytorch_adapter.bin
Configuration saved in parallel_adapter_checkPoints_es/variety/checkpoint-500/gender/head_config.json
Module weights saved in parallel_adapter_checkPoints_es/variety/checkpoint-500/gender/pytorch_model_head.bin
Configuration saved in parallel_adapter_checkPoints_es/variety/checkpoint-500/variety/adapter_config.json
Module weights saved in parallel_adapter_checkPoints_es/variety/checkpoint-500/variety/pytorch_adapter.bin
Configuration saved in parallel_adapter_checkPoints_es/variety/checkpoint-500/variety/head_config.json
Module weights saved in parallel_adapter_checkPoints_es/variety/checkpoint-500/variety/pytorch_model_head.bin
Configuration saved in parallel_adapter_checkPoints_es/variety/checkpo

Saving model checkpoint to parallel_adapter_checkPoints_es/variety/checkpoint-3000
Configuration saved in parallel_adapter_checkPoints_es/variety/checkpoint-3000/gender/adapter_config.json
Module weights saved in parallel_adapter_checkPoints_es/variety/checkpoint-3000/gender/pytorch_adapter.bin
Configuration saved in parallel_adapter_checkPoints_es/variety/checkpoint-3000/gender/head_config.json
Module weights saved in parallel_adapter_checkPoints_es/variety/checkpoint-3000/gender/pytorch_model_head.bin
Configuration saved in parallel_adapter_checkPoints_es/variety/checkpoint-3000/variety/adapter_config.json
Module weights saved in parallel_adapter_checkPoints_es/variety/checkpoint-3000/variety/pytorch_adapter.bin
Configuration saved in parallel_adapter_checkPoints_es/variety/checkpoint-3000/variety/head_config.json
Module weights saved in parallel_adapter_checkPoints_es/variety/checkpoint-3000/variety/pytorch_model_head.bin
Configuration saved in parallel_adapter_checkPoints_es/variet

Saving model checkpoint to parallel_adapter_checkPoints_es/variety/checkpoint-5500
Configuration saved in parallel_adapter_checkPoints_es/variety/checkpoint-5500/gender/adapter_config.json
Module weights saved in parallel_adapter_checkPoints_es/variety/checkpoint-5500/gender/pytorch_adapter.bin
Configuration saved in parallel_adapter_checkPoints_es/variety/checkpoint-5500/gender/head_config.json
Module weights saved in parallel_adapter_checkPoints_es/variety/checkpoint-5500/gender/pytorch_model_head.bin
Configuration saved in parallel_adapter_checkPoints_es/variety/checkpoint-5500/variety/adapter_config.json
Module weights saved in parallel_adapter_checkPoints_es/variety/checkpoint-5500/variety/pytorch_adapter.bin
Configuration saved in parallel_adapter_checkPoints_es/variety/checkpoint-5500/variety/head_config.json
Module weights saved in parallel_adapter_checkPoints_es/variety/checkpoint-5500/variety/pytorch_model_head.bin
Configuration saved in parallel_adapter_checkPoints_es/variet

Saving model checkpoint to parallel_adapter_checkPoints_es/variety/checkpoint-8000
Configuration saved in parallel_adapter_checkPoints_es/variety/checkpoint-8000/gender/adapter_config.json
Module weights saved in parallel_adapter_checkPoints_es/variety/checkpoint-8000/gender/pytorch_adapter.bin
Configuration saved in parallel_adapter_checkPoints_es/variety/checkpoint-8000/gender/head_config.json
Module weights saved in parallel_adapter_checkPoints_es/variety/checkpoint-8000/gender/pytorch_model_head.bin
Configuration saved in parallel_adapter_checkPoints_es/variety/checkpoint-8000/variety/adapter_config.json
Module weights saved in parallel_adapter_checkPoints_es/variety/checkpoint-8000/variety/pytorch_adapter.bin
Configuration saved in parallel_adapter_checkPoints_es/variety/checkpoint-8000/variety/head_config.json
Module weights saved in parallel_adapter_checkPoints_es/variety/checkpoint-8000/variety/pytorch_model_head.bin
Configuration saved in parallel_adapter_checkPoints_es/variet

Saving model checkpoint to parallel_adapter_checkPoints_es/variety/checkpoint-10500
Configuration saved in parallel_adapter_checkPoints_es/variety/checkpoint-10500/gender/adapter_config.json
Module weights saved in parallel_adapter_checkPoints_es/variety/checkpoint-10500/gender/pytorch_adapter.bin
Configuration saved in parallel_adapter_checkPoints_es/variety/checkpoint-10500/gender/head_config.json
Module weights saved in parallel_adapter_checkPoints_es/variety/checkpoint-10500/gender/pytorch_model_head.bin
Configuration saved in parallel_adapter_checkPoints_es/variety/checkpoint-10500/variety/adapter_config.json
Module weights saved in parallel_adapter_checkPoints_es/variety/checkpoint-10500/variety/pytorch_adapter.bin
Configuration saved in parallel_adapter_checkPoints_es/variety/checkpoint-10500/variety/head_config.json
Module weights saved in parallel_adapter_checkPoints_es/variety/checkpoint-10500/variety/pytorch_model_head.bin
Configuration saved in parallel_adapter_checkPoints_

# Testing

In [12]:
from TestingPAN17 import test_model_with_adapters

accuracy = test_model_with_adapters(model, baseTest)

100%|█████████████████████████████████████| 2800/2800 [1:04:13<00:00,  1.38s/it]


In [13]:
accuracy

{'gender': 0.8007142857142857,
 'variety': 0.9364285714285714,
 'joint': 0.7542857142857143}

## Guardar adapter

In [14]:
for task in tasks:
    model.save_adapter(save_directory = _SAVE_DIR_ + '_' + task, adapter_name = task)

Configuration saved in parallel_adapter_weights_gender/adapter_config.json
Module weights saved in parallel_adapter_weights_gender/pytorch_adapter.bin
Configuration saved in parallel_adapter_weights_gender/head_config.json
Module weights saved in parallel_adapter_weights_gender/pytorch_model_head.bin
Configuration saved in parallel_adapter_weights_variety/adapter_config.json
Module weights saved in parallel_adapter_weights_variety/pytorch_adapter.bin
Configuration saved in parallel_adapter_weights_variety/head_config.json
Module weights saved in parallel_adapter_weights_variety/pytorch_model_head.bin
