## Set Global Seed 

In [1]:
import os
import random
import numpy as np
import torch
import transformers

def set_all_seeds(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed = 260615
set_all_seeds(seed)

print("The global seed " + str(seed))

The global seed 260615


## Hyperparameters

In [2]:
# LANGUAGE

_LANGUAGE_         = 'en'
_DATASET_          = '2020'

In [3]:
# MODEL CLASSIFICATION

_PRETRAINED_LM_    = 'vinai/bertweet-base'
_PREPROCESS_TEXT_  = True
_TWEET_BATCH_SIZE_ = 5
_ADAPTER_CONFIG_   = transformers.ParallelConfig(reduction_factor = 256)
_MAX_SEQ_LEN_      = 128

In [4]:
# TRAIN

_OUTPUT_DIR_       = 'checkPointsParallel'
_LOGGING_STEPS_    = 50
_NUM_AUTHORS_      = 256
_K_FOLD_CV_        = 5
_NO_GPUS_          = 1
_BATCH_SIZE_       = int(32 / _NO_GPUS_)
_EPOCHS_           = 10
_LEARNING_RATE_    = 1e-4

# PREDICTIONS

_PRED_DIR_         = 'Parallel'

## Other parameters

In [5]:
# LABEL DICTONARIES -----------------------------------------------------------------------

# 2015

age_dict  = {'18-24': 0, '25-34': 1, '35-49': 2, '50-XX': 3}
ageEN_hyp = {0: '18-24', 1: '25-34', 2: '35-49', 3: '50-XX'}
ageES_hyp = {0: 'La edad de esta persona es entre 18 y 24 años', 
             1: 'La edad de esta persona es entre 25 y 34 años', 
             2: 'La edad de esta persona es entre 35 y 49 años', 
             3: 'La edad de esta persona es más de 50 años'}

# 2017

gender_dict    = {'female': 0, 'male':   1}
varietyEN_dict = {'australia': 0, 'canada': 1, 'great britain': 2, 'ireland': 3, 'new zealand': 4, 'united states': 5}
varietyES_dict = {'argentina': 0, 'chile': 1, 'colombia': 2, 'mexico': 3, 'peru': 4, 'spain': 5, 'venezuela': 6}  

genderEN_hyp  = {0: 'I’m a female', 1: 'I’m a male'}
genderES_hyp  = {0: 'Mi nombre es María', 1: 'Mi nombre es José'}

# 2019

bots_dict  = {'human': 0, 'bot': 1}
botsEN_hyp = {0: 'This is a text from a person', 1: 'This is a text from a machine'}
botsES_hyp = {0: 'Humano', 1: 'Bot'}

# 2020 

fakeNews_dict  = {'0': 0, '1': 1}
fakeNewsEN_hyp = {0: 'This author is a normal user', 1: 'This author spreads fake news'}
fakeNewsES_hyp = {0: 'Este autor es un usuario normal', 1: 'Este autor publica noticias falsas'}

# 2021

hateSpeech_dict  = {'0': 0, '1': 1}
hateSpeechEN_hyp = {0: 'This text does not contain hate speech', 1: 'This text expresses prejudice and hate speech'}
hateSpeechES_hyp = {0: 'Este texto es moderado, respetuoso, cortés y civilizado', 1: 'Este texto expresa odio o prejuicios'}

In [6]:
# SET LANGUAGE DICTIONARIES --------------------------------------------------

if _LANGUAGE_ == 'en':
    age_hyp        = ageEN_hyp
    gender_hyp     = genderEN_hyp
    variety_dict   = varietyEN_dict
    fakeNews_hyp   = fakeNewsEN_hyp
    hateSpeech_hyp = hateSpeechEN_hyp
    bots_hyp       = botsEN_hyp 

elif _LANGUAGE_ == 'es':
    age_hyp        = ageES_hyp
    gender_hyp     = genderES_hyp
    variety_dict   = varietyES_dict
    fakeNews_hyp   = fakeNewsES_hyp
    hateSpeech_hyp = hateSpeechES_hyp
    bots_hyp       = botsES_hyp
    
    
# SET LANGUAGE AND DATASET PARAMETERS ----------------------------------------
    
if   _DATASET_ == '2015':
    label_idx  = 2
    class_dict = age_dict
    label_name = 'age'
    label_hyp  = age_hyp
    
elif _DATASET_ == '2017':
    label_idx  = 1
    class_dict = gender_dict
    label_name = 'gender'
    label_hyp  = gender_hyp
    
elif _DATASET_ == '2019':
    label_idx  = 1
    class_dict = bots_dict
    label_name = 'bots'
    label_hyp  = bots_hyp
    
elif _DATASET_ == '2020':
    label_idx  = 1
    class_dict = fakeNews_dict
    label_name = 'fakeNews'
    label_hyp  = fakeNews_hyp
    
elif _DATASET_ == '2021':
    label_idx  = 1
    class_dict = hateSpeech_dict
    label_name = 'hateSpeech'
    label_hyp  = hateSpeech_hyp
    

In [7]:
# SET LANGUAGE TOKENIZER

from transformers import AutoTokenizer, PretrainedConfig

tokenizer = AutoTokenizer.from_pretrained(_PRETRAINED_LM_)
vocab = tokenizer.get_vocab()

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


## Datasets

In [8]:
# GET AUTHORS AND LABELS -----------------------------------------------------

from tools.DataLoaders import BasePAN

baseTrain  = BasePAN(Dir        = 'data/' + _DATASET_,
                     split      = 'train',
                     language   = _LANGUAGE_,
                     label_idx  = label_idx,
                     class_dict = class_dict,
                     label_name = label_name)

baseTest   = BasePAN(Dir        = 'data/' + _DATASET_,
                     split      = 'test',
                     language   = _LANGUAGE_,
                     label_idx  = label_idx,
                     class_dict = class_dict,
                     label_name = label_name)


In [9]:
# GET K-FOLD SPLITS -----------------------------------------------------

crossVal_splits = baseTrain.cross_val(_K_FOLD_CV_, _NUM_AUTHORS_)

crossVal_splits[0][0]

['e78b60f0-b114-4238-a0b2-dbd52ab12b99',
 'eb5cea6d-ecc7-4c1c-b149-dffb4e4d9373',
 'c65228f6-59a4-41a9-8d06-788406e5ff7e',
 'a1e96b2b-17eb-4450-911b-751a31fadf15',
 'ce325322-8731-426d-837a-60ea688fe82d',
 '541812a8-23ca-40a2-ba5a-2ae0908eada2',
 '23842323-5699-4551-9ff6-a684dee93a9a',
 'f1dcc4ff-0c7c-4fee-9a65-04a13285f327',
 'c2253559-eb87-44ac-9a0b-a1d1a0cdfd48',
 'ae414abd-cd83-4f0d-bc8c-d0a34a0071a3',
 '522a6f2f-d308-4b0e-ab31-0ab817e1f5d6',
 'f3af2ca2-dbfb-4447-a4f2-dfdfdda58640',
 '18fc8412-8068-4993-9382-f44d716f092e',
 'b7e9f372-21a8-461b-b4f0-950205f84da0',
 'dde4cfc8-f4cf-47be-8d9c-21b3b7f63098',
 '59c19daa-873b-434d-98d9-3b8c0168f946',
 'bb414002-cc26-4103-a00b-7f52a72efd59',
 '4b05f4e0-2b12-48f1-94c0-c55b4caf534c',
 '92defdbb-307e-4d13-b9ce-703f55adcf79',
 '1f17c4c3-0942-4fe5-8e03-5ad9f6a514d1',
 'fd7c89ad-deb5-4445-99c5-bf4c698ee371',
 'b1e938db-2e6c-46b5-a006-213bbada216b',
 '91c6d3f2-738c-4b86-8f1c-31a2311f987d',
 '1cd73ab3-e945-4334-ac09-7a1bf62559ab',
 '8398faf9-5d2b-

In [10]:
# GET TWEETS -----------------------------------------------------

baseTrain.get_all_data(_TWEET_BATCH_SIZE_, tokenizer, _MAX_SEQ_LEN_, _PREPROCESS_TEXT_)


Reading data...
    Done
Preprocessing text...
    Done
Tokenizing...
    Done
Merging data...
    Done

Total Instances: 2846



## Training and Testing

In [11]:
from tools.DataLoaders import DatasetPAN

baseTest.get_all_data(_TWEET_BATCH_SIZE_, tokenizer, _MAX_SEQ_LEN_, _PREPROCESS_TEXT_)

Test = DatasetPAN(baseTest, label_name)


Reading data...
    Done
Preprocessing text...
    Done
Tokenizing...
    Done
Merging data...
    Done

Total Instances: 2645



In [12]:
from transformers import TrainingArguments

samples = 2 * _NUM_AUTHORS_ * int(100 / _TWEET_BATCH_SIZE_)
_LOGGING_STEPS_ = int(samples / _BATCH_SIZE_)

training_args = TrainingArguments(
    learning_rate               = _LEARNING_RATE_,
    num_train_epochs            = _EPOCHS_,
    per_device_train_batch_size = _BATCH_SIZE_,
    per_device_eval_batch_size  = 200,
    logging_steps               = _LOGGING_STEPS_,
    output_dir                  = _OUTPUT_DIR_,
    save_total_limit            = 10,
    overwrite_output_dir        = True,
    remove_unused_columns       = False,
)

In [13]:
from transformers import AutoAdapterModel
from tools.DataLoaders import DatasetCrossVal
from transformers import AdapterTrainer
from tools.Testing import compute_author_predictions
from sklearn.metrics import f1_score, classification_report
import pickle

# initialize base model

model = AutoAdapterModel.from_pretrained(_PRETRAINED_LM_)


# train

task = label_name

f1s_soft = []
f1s_hard = []

for split in range( _K_FOLD_CV_ ):
    
    # loaders for current split ------------------------------------------
    
    authors_train, authors_val = crossVal_splits[split]
    
    Train = DatasetCrossVal(baseTrain, authors_train, task)
    Val   = DatasetCrossVal(baseTrain, authors_val  , task)
    
    
    # add adapter --------------------------------------------------------
    
    model.add_adapter(adapter_name = task,config = _ADAPTER_CONFIG_)
    model.add_classification_head(head_name = task, num_labels = len(class_dict))
    
    model.set_active_adapters(task)
    model.train_adapter(task)
    
    
    # create trainer and train -------------------------------------------
        
    trainer = AdapterTrainer(
        model           = model,
        args            = training_args,
        train_dataset   = Train,
    )
    trainer.args._n_gpu = _NO_GPUS_

    trainer.train()
    
    
    # get predictions ----------------------------------------------------
    
    results            = trainer.predict(Test)
    author_predictions = compute_author_predictions(baseTest, results.predictions, task, len(class_dict))
    
    
    # report metrics -----------------------------------------------------
    
    report = {'soft': classification_report(author_predictions['true'], author_predictions['pred_soft'], digits=4), 
               'hard': classification_report(author_predictions['true'], author_predictions['pred_hard'], digits=4)}

    f1s_soft.append( f1_score(author_predictions['true'], author_predictions['pred_soft'], average = 'macro') )
    f1s_hard.append( f1_score(author_predictions['true'], author_predictions['pred_hard'], average = 'macro') )

    print("Results with split " + str(split + 1) + ":\n")
    print("soft voting:\n", report['soft'], '\n')
    print("hard voting:\n", report['hard'])
     
    
    # save predictions ----------------------------------------------------
    
    DIR = 'results/' + _DATASET_ + '/' + _LANGUAGE_ + '/' + _PRED_DIR_ + '/' + str(_NUM_AUTHORS_) + '_authors/test_split_' + str(split + 1) + '/'
    if not os.path.exists(DIR):
        os.makedirs(DIR)

    with open(DIR + 'predictions.pickle', 'wb') as f:
        pickle.dump(author_predictions, f)

    with open(DIR + 'report.txt', 'w') as f:
        f.write("soft voting:\n" + report['soft'] + '\n\n')
        f.write("hard voting:\n" + report['hard'])
    
    
    # delete adapter -------------------------------------------------------
    
    model.delete_adapter(task)
    model.delete_head(task)

Some weights of the model checkpoint at vinai/bertweet-base were not used when initializing RobertaAdapterModel: ['lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.bias', 'lm_head.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaAdapterModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaAdapterModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
***** Running training *****
  Num examples = 1694
  Num Epochs = 10
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps 

Step,Training Loss
40,1.1979
80,0.9602
120,0.8235
160,0.7715
200,0.657
240,0.6234
280,0.5672
320,0.5205
360,0.4922
400,0.4778


Saving model checkpoint to checkPointsHoulsby/checkpoint-500
Configuration saved in checkPointsHoulsby/checkpoint-500/age/adapter_config.json
Module weights saved in checkPointsHoulsby/checkpoint-500/age/pytorch_adapter.bin
Configuration saved in checkPointsHoulsby/checkpoint-500/age/head_config.json
Module weights saved in checkPointsHoulsby/checkpoint-500/age/pytorch_model_head.bin
Configuration saved in checkPointsHoulsby/checkpoint-500/age/head_config.json
Module weights saved in checkPointsHoulsby/checkpoint-500/age/pytorch_model_head.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Prediction *****
  Num examples = 2645
  Batch size = 200


100%|█████████████████████████████████████████████████████████████████████| 142/142 [00:00<00:00, 2441.25it/s]
Adding adapter 'age'.
Adding head 'age' with config {'head_type': 'classification', 'num_labels': 4, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1, 'LABEL_2': 2, 'LABEL_3': 3}, 'use_pooler': False, 'bias': True}.
***** Running training *****
  Num examples = 1717
  Num Epochs = 10
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 540


Results with split 1:

soft voting:
               precision    recall  f1-score   support

           0     0.9273    0.9107    0.9189        56
           1     0.7536    0.8966    0.8189        58
           2     0.7857    0.5500    0.6471        20
           3     1.0000    0.5000    0.6667         8

    accuracy                         0.8310       142
   macro avg     0.8667    0.7143    0.7629       142
weighted avg     0.8405    0.8310    0.8256       142
 

hard voting:
               precision    recall  f1-score   support

           0     0.9259    0.8929    0.9091        56
           1     0.7391    0.8793    0.8031        58
           2     0.7333    0.5500    0.6286        20
           3     1.0000    0.5000    0.6667         8

    accuracy                         0.8169       142
   macro avg     0.8496    0.7055    0.7519       142
weighted avg     0.8267    0.8169    0.8127       142



Step,Training Loss
40,1.2189
80,0.9438
120,0.8004
160,0.7536
200,0.6575
240,0.616
280,0.5783
320,0.5486
360,0.5057
400,0.4957


Saving model checkpoint to checkPointsHoulsby/checkpoint-500
Configuration saved in checkPointsHoulsby/checkpoint-500/age/adapter_config.json
Module weights saved in checkPointsHoulsby/checkpoint-500/age/pytorch_adapter.bin
Configuration saved in checkPointsHoulsby/checkpoint-500/age/head_config.json
Module weights saved in checkPointsHoulsby/checkpoint-500/age/pytorch_model_head.bin
Configuration saved in checkPointsHoulsby/checkpoint-500/age/head_config.json
Module weights saved in checkPointsHoulsby/checkpoint-500/age/pytorch_model_head.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Prediction *****
  Num examples = 2645
  Batch size = 200


100%|█████████████████████████████████████████████████████████████████████| 142/142 [00:00<00:00, 2377.21it/s]
Adding adapter 'age'.
Adding head 'age' with config {'head_type': 'classification', 'num_labels': 4, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1, 'LABEL_2': 2, 'LABEL_3': 3}, 'use_pooler': False, 'bias': True}.
***** Running training *****
  Num examples = 1720
  Num Epochs = 10
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 540


Results with split 2:

soft voting:
               precision    recall  f1-score   support

           0     0.9600    0.8571    0.9057        56
           1     0.7000    0.9655    0.8116        58
           2     0.8750    0.3500    0.5000        20
           3     1.0000    0.5000    0.6667         8

    accuracy                         0.8099       142
   macro avg     0.8838    0.6682    0.7210       142
weighted avg     0.8441    0.8099    0.7966       142
 

hard voting:
               precision    recall  f1-score   support

           0     0.9057    0.8571    0.8807        56
           1     0.6974    0.9138    0.7910        58
           2     0.8889    0.4000    0.5517        20
           3     1.0000    0.5000    0.6667         8

    accuracy                         0.7958       142
   macro avg     0.8730    0.6677    0.7225       142
weighted avg     0.8235    0.7958    0.7857       142



Step,Training Loss
40,1.2005
80,0.9079
120,0.7762
160,0.6751
200,0.5857
240,0.5559
280,0.457
320,0.4529
360,0.4448
400,0.4065


Saving model checkpoint to checkPointsHoulsby/checkpoint-500
Configuration saved in checkPointsHoulsby/checkpoint-500/age/adapter_config.json
Module weights saved in checkPointsHoulsby/checkpoint-500/age/pytorch_adapter.bin
Configuration saved in checkPointsHoulsby/checkpoint-500/age/head_config.json
Module weights saved in checkPointsHoulsby/checkpoint-500/age/pytorch_model_head.bin
Configuration saved in checkPointsHoulsby/checkpoint-500/age/head_config.json
Module weights saved in checkPointsHoulsby/checkpoint-500/age/pytorch_model_head.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Prediction *****
  Num examples = 2645
  Batch size = 200


100%|█████████████████████████████████████████████████████████████████████| 142/142 [00:00<00:00, 2317.15it/s]
Adding adapter 'age'.
Adding head 'age' with config {'head_type': 'classification', 'num_labels': 4, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1, 'LABEL_2': 2, 'LABEL_3': 3}, 'use_pooler': False, 'bias': True}.
***** Running training *****
  Num examples = 1709
  Num Epochs = 10
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 540


Results with split 3:

soft voting:
               precision    recall  f1-score   support

           0     0.8667    0.9286    0.8966        56
           1     0.7705    0.8103    0.7899        58
           2     0.7857    0.5500    0.6471        20
           3     0.7143    0.6250    0.6667         8

    accuracy                         0.8099       142
   macro avg     0.7843    0.7285    0.7500       142
weighted avg     0.8074    0.8099    0.8049       142
 

hard voting:
               precision    recall  f1-score   support

           0     0.8667    0.9286    0.8966        56
           1     0.7869    0.8276    0.8067        58
           2     0.7692    0.5000    0.6061        20
           3     0.6250    0.6250    0.6250         8

    accuracy                         0.8099       142
   macro avg     0.7619    0.7203    0.7336       142
weighted avg     0.8067    0.8099    0.8036       142



Step,Training Loss
40,1.2354
80,0.9851
120,0.8531
160,0.7518
200,0.6829
240,0.5852
280,0.6011
320,0.5285
360,0.4884
400,0.4897


Saving model checkpoint to checkPointsHoulsby/checkpoint-500
Configuration saved in checkPointsHoulsby/checkpoint-500/age/adapter_config.json
Module weights saved in checkPointsHoulsby/checkpoint-500/age/pytorch_adapter.bin
Configuration saved in checkPointsHoulsby/checkpoint-500/age/head_config.json
Module weights saved in checkPointsHoulsby/checkpoint-500/age/pytorch_model_head.bin
Configuration saved in checkPointsHoulsby/checkpoint-500/age/head_config.json
Module weights saved in checkPointsHoulsby/checkpoint-500/age/pytorch_model_head.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Prediction *****
  Num examples = 2645
  Batch size = 200


100%|█████████████████████████████████████████████████████████████████████| 142/142 [00:00<00:00, 2250.12it/s]
Adding adapter 'age'.
Adding head 'age' with config {'head_type': 'classification', 'num_labels': 4, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1, 'LABEL_2': 2, 'LABEL_3': 3}, 'use_pooler': False, 'bias': True}.
***** Running training *****
  Num examples = 1707
  Num Epochs = 10
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 540


Results with split 4:

soft voting:
               precision    recall  f1-score   support

           0     0.8226    0.9107    0.8644        56
           1     0.7031    0.7759    0.7377        58
           2     0.6667    0.4000    0.5000        20
           3     1.0000    0.5000    0.6667         8

    accuracy                         0.7606       142
   macro avg     0.7981    0.6466    0.6922       142
weighted avg     0.7618    0.7606    0.7502       142
 

hard voting:
               precision    recall  f1-score   support

           0     0.8226    0.9107    0.8644        56
           1     0.6984    0.7586    0.7273        58
           2     0.6667    0.4000    0.5000        20
           3     0.8000    0.5000    0.6154         8

    accuracy                         0.7535       142
   macro avg     0.7469    0.6423    0.6768       142
weighted avg     0.7486    0.7535    0.7430       142



Step,Training Loss
40,1.2348
80,0.9861
120,0.845
160,0.7468
200,0.6723
240,0.6401
280,0.5788
320,0.5379
360,0.5086
400,0.4712


Saving model checkpoint to checkPointsHoulsby/checkpoint-500
Configuration saved in checkPointsHoulsby/checkpoint-500/age/adapter_config.json
Module weights saved in checkPointsHoulsby/checkpoint-500/age/pytorch_adapter.bin
Configuration saved in checkPointsHoulsby/checkpoint-500/age/head_config.json
Module weights saved in checkPointsHoulsby/checkpoint-500/age/pytorch_model_head.bin
Configuration saved in checkPointsHoulsby/checkpoint-500/age/head_config.json
Module weights saved in checkPointsHoulsby/checkpoint-500/age/pytorch_model_head.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Prediction *****
  Num examples = 2645
  Batch size = 200


100%|█████████████████████████████████████████████████████████████████████| 142/142 [00:00<00:00, 2386.88it/s]

Results with split 5:

soft voting:
               precision    recall  f1-score   support

           0     0.9273    0.9107    0.9189        56
           1     0.7647    0.8966    0.8254        58
           2     0.7143    0.5000    0.5882        20
           3     1.0000    0.6250    0.7692         8

    accuracy                         0.8310       142
   macro avg     0.8516    0.7331    0.7754       142
weighted avg     0.8350    0.8310    0.8257       142
 

hard voting:
               precision    recall  f1-score   support

           0     0.9107    0.9107    0.9107        56
           1     0.7429    0.8966    0.8125        58
           2     0.8182    0.4500    0.5806        20
           3     1.0000    0.6250    0.7692         8

    accuracy                         0.8239       142
   macro avg     0.8679    0.7206    0.7683       142
weighted avg     0.8342    0.8239    0.8161       142






In [14]:
# report statistics

print('Soft results: ', f1s_soft)
print('\nHard results: ', f1s_hard)

f1s_soft = np.array(f1s_soft)
f1s_hard = np.array(f1s_hard)

FewShot_Results = {'soft': [f1s_soft.mean(), f1s_soft.std()], 'hard': [f1s_hard.mean(), f1s_hard.std()]}

print('\n\nSoft statistics: ')
print('\t[avg, std]:', FewShot_Results['soft'])

print('\nHard statistics: ')
print('\t[avg, std]:', FewShot_Results['hard'])

Soft results:  [0.7628855117275682, 0.7209803117309269, 0.7500482951801409, 0.6921945910901176, 0.7754454519160401]

Hard results:  [0.7518696526570542, 0.7225423814178081, 0.7335837548185419, 0.67676603057959, 0.7682725540588443]


Soft statistics: 
	[avg, std]: [0.7403108323289588, 0.030092249911903783]

Hard statistics: 
	[avg, std]: [0.7306068747063678, 0.031124085499019058]


## Training with all

In [11]:
from tools.DataLoaders import DatasetPAN

baseTest.get_all_data(_TWEET_BATCH_SIZE_, tokenizer, _MAX_SEQ_LEN_, _PREPROCESS_TEXT_)

Test  = DatasetPAN(baseTest , label_name)
Train = DatasetPAN(baseTrain, label_name)


Reading data...
    Done
Preprocessing text...
    Done
Tokenizing...
    Done
Merging data...
    Done

Total Instances: 2645



In [12]:
from transformers import TrainingArguments

samples = 4 * _NUM_AUTHORS_ * int(100 / _TWEET_BATCH_SIZE_)
_LOGGING_STEPS_ = int(samples / _BATCH_SIZE_)

training_args = TrainingArguments(
    learning_rate               = _LEARNING_RATE_,
    num_train_epochs            = _EPOCHS_,
    per_device_train_batch_size = _BATCH_SIZE_,
    per_device_eval_batch_size  = 200,
    logging_steps               = _LOGGING_STEPS_,
    output_dir                  = _OUTPUT_DIR_,
    save_total_limit            = 5,
    overwrite_output_dir        = True,
    remove_unused_columns       = False,
)

In [13]:
from transformers import AutoAdapterModel
from tools.DataLoaders import DatasetCrossVal
from transformers import AdapterTrainer
from tools.Testing import compute_author_predictions
from sklearn.metrics import f1_score, classification_report
import pickle

# initialize base model

model = AutoAdapterModel.from_pretrained(_PRETRAINED_LM_)


# train

task = label_name

# add adapter --------------------------------------------------------

model.add_adapter(adapter_name = task,config = _ADAPTER_CONFIG_)
model.add_classification_head(head_name = task, num_labels = len(class_dict))

model.set_active_adapters(task)
model.train_adapter(task)


# create trainer and train -------------------------------------------

trainer = AdapterTrainer(
    model           = model,
    args            = training_args,
    train_dataset   = Train,
)
trainer.args._n_gpu = _NO_GPUS_

trainer.train()


# get predictions ----------------------------------------------------

results            = trainer.predict(Test)
author_predictions = compute_author_predictions(baseTest, results.predictions, task, len(class_dict))


# report metrics -----------------------------------------------------

report = {'soft': classification_report(author_predictions['true'], author_predictions['pred_soft'], digits=4), 
           'hard': classification_report(author_predictions['true'], author_predictions['pred_hard'], digits=4)}

Some weights of the model checkpoint at vinai/bertweet-base were not used when initializing RobertaAdapterModel: ['lm_head.layer_norm.bias', 'lm_head.decoder.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaAdapterModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaAdapterModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
***** Running training *****
  Num examples = 2846
  Num Epochs = 10
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps 

Step,Training Loss
640,0.6806


Saving model checkpoint to checkPointsParallel/checkpoint-500
Configuration saved in checkPointsParallel/checkpoint-500/age/adapter_config.json
Module weights saved in checkPointsParallel/checkpoint-500/age/pytorch_adapter.bin
Configuration saved in checkPointsParallel/checkpoint-500/age/head_config.json
Module weights saved in checkPointsParallel/checkpoint-500/age/pytorch_model_head.bin
Configuration saved in checkPointsParallel/checkpoint-500/age/head_config.json
Module weights saved in checkPointsParallel/checkpoint-500/age/pytorch_model_head.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Prediction *****
  Num examples = 2645
  Batch size = 200


100%|█████████████████████████████████████████████████████████████████████| 142/142 [00:00<00:00, 2118.18it/s]


In [14]:

print("Results:\n")
print("soft voting:\n", report['soft'], '\n')
print("hard voting:\n", report['hard'])

Results:

soft voting:
               precision    recall  f1-score   support

           0     0.9091    0.8929    0.9009        56
           1     0.7324    0.8966    0.8062        58
           2     0.7000    0.3500    0.4667        20
           3     0.6667    0.5000    0.5714         8

    accuracy                         0.7958       142
   macro avg     0.7520    0.6599    0.6863       142
weighted avg     0.7938    0.7958    0.7825       142
 

hard voting:
               precision    recall  f1-score   support

           0     0.9434    0.8929    0.9174        56
           1     0.7361    0.9138    0.8154        58
           2     0.7000    0.3500    0.4667        20
           3     0.7143    0.6250    0.6667         8

    accuracy                         0.8099       142
   macro avg     0.7734    0.6954    0.7165       142
weighted avg     0.8115    0.8099    0.7981       142

