## Set Global Seed 

In [1]:
import os
import random
import numpy as np
import torch
import transformers

def set_all_seeds(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed = 260615
set_all_seeds(seed)

print("The global seed " + str(seed))

The global seed 260615


## Hyperparameters

In [2]:
# LANGUAGE

_LANGUAGE_         = 'en'
_DATASET_          = '2021'

In [3]:
# MODEL CLASSIFICATION

_PRETRAINED_LM_    = 'vinai/bertweet-base'
_PREPROCESS_TEXT_  = True
_TWEET_BATCH_SIZE_ = 5
_ADAPTER_CONFIG_   = transformers.ParallelConfig(reduction_factor = 256)
_MAX_SEQ_LEN_      = 128

In [4]:
# TRAIN

_OUTPUT_DIR_       = 'checkPointsFE'
_LOGGING_STEPS_    = 50
_NUM_AUTHORS_      = 64
_K_FOLD_CV_        = 5
_NO_GPUS_          = 1
_BATCH_SIZE_       = int(32 / _NO_GPUS_)
_EPOCHS_           = 10
_LEARNING_RATE_    = 1e-4

# PREDICTIONS

_PRED_DIR_         = 'FETestGeneralized'

## Other parameters

In [5]:
# LABEL DICTONARIES -----------------------------------------------------------------------

# 2017

gender_dict    = {'female': 0, 'male':   1}
varietyEN_dict = {'australia': 0, 'canada': 1, 'great britain': 2, 'ireland': 3, 'new zealand': 4, 'united states': 5}
varietyES_dict = {'argentina': 0, 'chile': 1, 'colombia': 2, 'mexico': 3, 'peru': 4, 'spain': 5, 'venezuela': 6}  

genderEN_hyp  = {0: 'I’m a female', 1: 'I’m a male'}
genderES_hyp  = {0: 'Mi nombre es María', 1: 'Mi nombre es José'}

# 2019

bots_dict  = {'human': 0, 'bot': 1}
botsEN_hyp = {0: 'This is a text from a person', 1: 'This is a text from a machine'}
botsES_hyp = {0: 'Humano', 1: 'Bot'}

# 2020 

fakeNews_dict  = {'0': 0, '1': 1}
fakeNewsEN_hyp = {0: 'This author is a normal user', 1: 'This author spreads fake news'}
fakeNewsES_hyp = {0: 'Este autor es un usuario normal', 1: 'Este autor publica noticias falsas'}

# 2021

hateSpeech_dict  = {'0': 0, '1': 1}
hateSpeechEN_hyp = {0: 'This text does not contain hate speech', 1: 'This text expresses prejudice and hate speech'}
hateSpeechES_hyp = {0: 'Este texto es moderado, respetuoso, cortés y civilizado', 1: 'Este texto expresa odio o prejuicios'}

In [6]:
# SET LANGUAGE DICTIONARIES --------------------------------------------------

if _LANGUAGE_ == 'en':
    gender_hyp     = genderEN_hyp
    variety_dict   = varietyEN_dict
    fakeNews_hyp   = fakeNewsEN_hyp
    hateSpeech_hyp = hateSpeechEN_hyp
    bots_hyp       = botsEN_hyp 

elif _LANGUAGE_ == 'es':
    gender_hyp     = genderES_hyp
    variety_dict   = varietyES_dict
    fakeNews_hyp   = fakeNewsES_hyp
    hateSpeech_hyp = hateSpeechES_hyp
    bots_hyp       = botsES_hyp
    
    
# SET LANGUAGE AND DATASET PARAMETERS ----------------------------------------
    
if   _DATASET_ == '2017':
    label_idx  = 1
    class_dict = gender_dict
    label_name = 'gender'
    label_hyp  = gender_hyp
    
elif _DATASET_ == '2019':
    label_idx  = 1
    class_dict = bots_dict
    label_name = 'bots'
    label_hyp  = bots_hyp
    
elif _DATASET_ == '2020':
    label_idx  = 1
    class_dict = fakeNews_dict
    label_name = 'fakeNews'
    label_hyp  = fakeNews_hyp
    
elif _DATASET_ == '2021':
    label_idx  = 1
    class_dict = hateSpeech_dict
    label_name = 'hateSpeech'
    label_hyp  = hateSpeech_hyp
    

In [7]:
# SET LANGUAGE TOKENIZER

from transformers import AutoTokenizer, PretrainedConfig

tokenizer = AutoTokenizer.from_pretrained(_PRETRAINED_LM_)
vocab = tokenizer.get_vocab()

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


## Datasets

In [8]:
# GET AUTHORS AND LABELS -----------------------------------------------------

from tools.DataLoaders import BasePAN

baseTrain  = BasePAN(Dir        = 'data/' + _DATASET_,
                     split      = 'train',
                     language   = _LANGUAGE_,
                     label_idx  = label_idx,
                     class_dict = class_dict,
                     label_name = label_name)

baseTest   = BasePAN(Dir        = 'data/' + _DATASET_,
                     split      = 'test',
                     language   = _LANGUAGE_,
                     label_idx  = label_idx,
                     class_dict = class_dict,
                     label_name = label_name)

"\nbaseTest   = BasePAN(Dir        = 'data/' + _DATASET_,\n                     split      = 'test',\n                     language   = _LANGUAGE_,\n                     label_idx  = label_idx,\n                     class_dict = class_dict,\n                     label_name = label_name)\n"

In [9]:
# GET K-FOLD SPLITS -----------------------------------------------------

crossVal_splits = baseTrain.cross_val(_K_FOLD_CV_, _NUM_AUTHORS_)

crossVal_splits[0][0]

['98c59ff3e2cfc56b96ee3c49b3d46296',
 'aa917a8c5a4420b024274672667c7dc1',
 '86a7f84c2dd126dac46270b6c912952c',
 '9cff4936f8479d53fcbb63f2524c5ad8',
 '30be488aa93e8000aadb952a9cd5143c',
 'a8e2397021acef98cc32729cbda96910',
 '3770a07b212c1096c26e5a1f1556fbd1',
 'b8924a54bb6043c56969e20a328b76b3',
 '258ba7b57bc38e4987f9f3cf23700ece',
 '26644d1348fc1122e8c5ef45d6bc84fa',
 '54f81e27af90ed7c1c9409c332f0ca37',
 '58584745632b5367da1c7a9af746222b',
 '7f269488a6576c9dc21085c1e2854142',
 '6711ef348ffcb3e45d2957396a4c8026',
 '3df768933d03108ea4c6583d49c85c46',
 '365eb1e3abc5cd5394fec8fc162bfbc5',
 'b496caf332cb0ba97d2acefc44f153ac',
 'fdef657f264ca50bc7b21574b24f82ab',
 '76e152a7732922e7a6da39880486107f',
 '4253c341c1069eded30b6efd2df89ddc',
 '4f496db1408c402eb21d29e536667205',
 '4ae4ddc8cb2774c92398e3102c3da5b2',
 'f3eecd0eedab3b77558d93b1b92579a4',
 '4a1baf66990e0e540effd01f4b105f44',
 '748c4b31797d62bcce99de35a681b484',
 'a887ec85088a87e550015e2770a6e309',
 '9d58d6313bfb2fba9e1e45bb9d65cf0b',
 

In [10]:
# GET TWEETS -----------------------------------------------------

baseTrain.get_all_data(_TWEET_BATCH_SIZE_, tokenizer, _MAX_SEQ_LEN_, _PREPROCESS_TEXT_)


Reading data...
    Done
Preprocessing text...
    Done
Tokenizing...
    Done
Merging data...
    Done

Total Instances: 8000



## Training

In [11]:
from transformers import TrainingArguments

samples = 2 * _NUM_AUTHORS_ * int(100 / _TWEET_BATCH_SIZE_)
_LOGGING_STEPS_ = int(samples / _BATCH_SIZE_)

training_args = TrainingArguments(
    learning_rate               = _LEARNING_RATE_,
    num_train_epochs            = _EPOCHS_,
    per_device_train_batch_size = _BATCH_SIZE_,
    per_device_eval_batch_size  = 200,
    logging_steps               = _LOGGING_STEPS_,
    output_dir                  = _OUTPUT_DIR_,
    save_total_limit            = 10,
    overwrite_output_dir        = True,
    remove_unused_columns       = False,
)

In [12]:
from transformers import AutoModelForSequenceClassification
from tools.DataLoaders import DatasetCrossVal
from transformers import Trainer
from tools.Testing import compute_author_predictions
from sklearn.metrics import f1_score, classification_report
import pickle


# train

task = label_name

f1s_soft = []
f1s_hard = []

for split in range( _K_FOLD_CV_ ):
    
    # loaders for current split ------------------------------------------
    
    authors_train, authors_val = crossVal_splits[split]
    
    Train = DatasetCrossVal(baseTrain, authors_train, task)
    Val   = DatasetCrossVal(baseTrain, authors_val  , task)
    
    
    # initialize model ---------------------------------------------------
    
    model = AutoModelForSequenceClassification.from_pretrained(_PRETRAINED_LM_, num_labels = len(class_dict))
    
    for name, param in model.named_parameters():
        if 'classifier' in name:
            param.requires_grad = True
        else:
            param.requires_grad = False
    
    # create trainer and train -------------------------------------------
        
    trainer = Trainer(
        model           = model,
        args            = training_args,
        train_dataset   = Train,
    )
    trainer.args._n_gpu = _NO_GPUS_

    trainer.train()
    
    
    # get predictions ----------------------------------------------------
    
    results            = trainer.predict(Val)
    author_predictions = compute_author_predictions(Val, results.predictions, task, len(class_dict))
    
    
    # report metrics -----------------------------------------------------
    
    report = {'soft': classification_report(author_predictions['true'], author_predictions['pred_soft'], digits=4), 
               'hard': classification_report(author_predictions['true'], author_predictions['pred_hard'], digits=4)}

    f1s_soft.append( f1_score(author_predictions['true'], author_predictions['pred_soft'], average = 'macro') )
    f1s_hard.append( f1_score(author_predictions['true'], author_predictions['pred_hard'], average = 'macro') )

    print("Results with split " + str(split + 1) + ":\n")
    print("soft voting:\n", report['soft'], '\n')
    print("hard voting:\n", report['hard'])
     
    
    # save predictions ----------------------------------------------------
    
    DIR = 'results/' + _DATASET_ + '/' + _LANGUAGE_ + '/' + _PRED_DIR_ + '/' + str(_NUM_AUTHORS_) + '_authors/split_' + str(split + 1) + '/'
    if not os.path.exists(DIR):
        os.makedirs(DIR)

    with open(DIR + 'predictions.pickle', 'wb') as f:
        pickle.dump(author_predictions, f)

    with open(DIR + 'report.txt', 'w') as f:
        f.write("soft voting:\n" + report['soft'] + '\n\n')
        f.write("hard voting:\n" + report['hard'])
    


Some weights of the model checkpoint at vinai/bertweet-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'roberta.pooler.dense.bias', 'lm_head.bias', 'roberta.pooler.dense.weight', 'lm_head.dense.weight', 'lm_head.decoder.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: 

Step,Training Loss
80,0.687
160,0.6711
240,0.6527
320,0.6561
400,0.6424
480,0.6423
560,0.6387
640,0.6375
720,0.6177
800,0.6302


Saving model checkpoint to checkPointsFE/checkpoint-500
Configuration saved in checkPointsFE/checkpoint-500/config.json
Model weights saved in checkPointsFE/checkpoint-500/pytorch_model.bin
Saving model checkpoint to checkPointsFE/checkpoint-1000
Configuration saved in checkPointsFE/checkpoint-1000/config.json
Model weights saved in checkPointsFE/checkpoint-1000/pytorch_model.bin
Saving model checkpoint to checkPointsFE/checkpoint-1500
Configuration saved in checkPointsFE/checkpoint-1500/config.json
Model weights saved in checkPointsFE/checkpoint-1500/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Prediction *****
  Num examples = 1600
  Batch size = 200


100%|████████████████████████████████████████████████████████████████████████| 40/40 [00:00<00:00, 960.10it/s]


Results with split 1:

soft voting:
               precision    recall  f1-score   support

           0     0.7200    0.9000    0.8000        20
           1     0.8667    0.6500    0.7429        20

    accuracy                         0.7750        40
   macro avg     0.7933    0.7750    0.7714        40
weighted avg     0.7933    0.7750    0.7714        40
 

hard voting:
               precision    recall  f1-score   support

           0     0.6923    0.9000    0.7826        20
           1     0.8571    0.6000    0.7059        20

    accuracy                         0.7500        40
   macro avg     0.7747    0.7500    0.7442        40
weighted avg     0.7747    0.7500    0.7442        40



loading configuration file https://huggingface.co/vinai/bertweet-base/resolve/main/config.json from cache at /001/usuarios/isaac.bribiesca/.cache/huggingface/transformers/356366feedcea0917e30f7f235e1e062ffc2d28138445d5672a184be756c8686.a2b6026e688d1b19cebc0981d8f3a5b1668eabfda55b2c42049d5eac0bc8cb2d
Model config RobertaConfig {
  "_name_or_path": "vinai/bertweet-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 130,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "tokenizer_class": "BertweetTokenizer",
  "transformers_version": "4.21.3",
  "type_vocab

Step,Training Loss
80,0.6881
160,0.6629
240,0.6549
320,0.6443
400,0.6371
480,0.6405
560,0.6266
640,0.6346
720,0.6168
800,0.6215


Saving model checkpoint to checkPointsFE/checkpoint-500
Configuration saved in checkPointsFE/checkpoint-500/config.json
Model weights saved in checkPointsFE/checkpoint-500/pytorch_model.bin
Saving model checkpoint to checkPointsFE/checkpoint-1000
Configuration saved in checkPointsFE/checkpoint-1000/config.json
Model weights saved in checkPointsFE/checkpoint-1000/pytorch_model.bin
Saving model checkpoint to checkPointsFE/checkpoint-1500
Configuration saved in checkPointsFE/checkpoint-1500/config.json
Model weights saved in checkPointsFE/checkpoint-1500/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Prediction *****
  Num examples = 1600
  Batch size = 200


100%|███████████████████████████████████████████████████████████████████████| 40/40 [00:00<00:00, 1060.73it/s]


Results with split 2:

soft voting:
               precision    recall  f1-score   support

           0     0.6400    0.8000    0.7111        20
           1     0.7333    0.5500    0.6286        20

    accuracy                         0.6750        40
   macro avg     0.6867    0.6750    0.6698        40
weighted avg     0.6867    0.6750    0.6698        40
 

hard voting:
               precision    recall  f1-score   support

           0     0.6154    0.8000    0.6957        20
           1     0.7143    0.5000    0.5882        20

    accuracy                         0.6500        40
   macro avg     0.6648    0.6500    0.6419        40
weighted avg     0.6648    0.6500    0.6419        40



loading configuration file https://huggingface.co/vinai/bertweet-base/resolve/main/config.json from cache at /001/usuarios/isaac.bribiesca/.cache/huggingface/transformers/356366feedcea0917e30f7f235e1e062ffc2d28138445d5672a184be756c8686.a2b6026e688d1b19cebc0981d8f3a5b1668eabfda55b2c42049d5eac0bc8cb2d
Model config RobertaConfig {
  "_name_or_path": "vinai/bertweet-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 130,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "tokenizer_class": "BertweetTokenizer",
  "transformers_version": "4.21.3",
  "type_vocab

Step,Training Loss
80,0.6812
160,0.6585
240,0.6452
320,0.6222
400,0.6258
480,0.6117
560,0.6039
640,0.6056
720,0.6002
800,0.5957


Saving model checkpoint to checkPointsFE/checkpoint-500
Configuration saved in checkPointsFE/checkpoint-500/config.json
Model weights saved in checkPointsFE/checkpoint-500/pytorch_model.bin
Saving model checkpoint to checkPointsFE/checkpoint-1000
Configuration saved in checkPointsFE/checkpoint-1000/config.json
Model weights saved in checkPointsFE/checkpoint-1000/pytorch_model.bin
Saving model checkpoint to checkPointsFE/checkpoint-1500
Configuration saved in checkPointsFE/checkpoint-1500/config.json
Model weights saved in checkPointsFE/checkpoint-1500/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Prediction *****
  Num examples = 1600
  Batch size = 200


100%|███████████████████████████████████████████████████████████████████████| 40/40 [00:00<00:00, 1143.91it/s]


Results with split 3:

soft voting:
               precision    recall  f1-score   support

           0     0.5263    0.5000    0.5128        20
           1     0.5238    0.5500    0.5366        20

    accuracy                         0.5250        40
   macro avg     0.5251    0.5250    0.5247        40
weighted avg     0.5251    0.5250    0.5247        40
 

hard voting:
               precision    recall  f1-score   support

           0     0.5263    0.5000    0.5128        20
           1     0.5238    0.5500    0.5366        20

    accuracy                         0.5250        40
   macro avg     0.5251    0.5250    0.5247        40
weighted avg     0.5251    0.5250    0.5247        40



loading configuration file https://huggingface.co/vinai/bertweet-base/resolve/main/config.json from cache at /001/usuarios/isaac.bribiesca/.cache/huggingface/transformers/356366feedcea0917e30f7f235e1e062ffc2d28138445d5672a184be756c8686.a2b6026e688d1b19cebc0981d8f3a5b1668eabfda55b2c42049d5eac0bc8cb2d
Model config RobertaConfig {
  "_name_or_path": "vinai/bertweet-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 130,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "tokenizer_class": "BertweetTokenizer",
  "transformers_version": "4.21.3",
  "type_vocab

Step,Training Loss
80,0.6829
160,0.6635
240,0.6411
320,0.6382
400,0.6283
480,0.6264
560,0.6172
640,0.6121
720,0.6147
800,0.6101


Saving model checkpoint to checkPointsFE/checkpoint-500
Configuration saved in checkPointsFE/checkpoint-500/config.json
Model weights saved in checkPointsFE/checkpoint-500/pytorch_model.bin
Saving model checkpoint to checkPointsFE/checkpoint-1000
Configuration saved in checkPointsFE/checkpoint-1000/config.json
Model weights saved in checkPointsFE/checkpoint-1000/pytorch_model.bin
Saving model checkpoint to checkPointsFE/checkpoint-1500
Configuration saved in checkPointsFE/checkpoint-1500/config.json
Model weights saved in checkPointsFE/checkpoint-1500/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Prediction *****
  Num examples = 1600
  Batch size = 200


100%|███████████████████████████████████████████████████████████████████████| 40/40 [00:00<00:00, 1028.92it/s]


Results with split 4:

soft voting:
               precision    recall  f1-score   support

           0     0.7000    0.7000    0.7000        20
           1     0.7000    0.7000    0.7000        20

    accuracy                         0.7000        40
   macro avg     0.7000    0.7000    0.7000        40
weighted avg     0.7000    0.7000    0.7000        40
 

hard voting:
               precision    recall  f1-score   support

           0     0.7500    0.7500    0.7500        20
           1     0.7500    0.7500    0.7500        20

    accuracy                         0.7500        40
   macro avg     0.7500    0.7500    0.7500        40
weighted avg     0.7500    0.7500    0.7500        40



loading configuration file https://huggingface.co/vinai/bertweet-base/resolve/main/config.json from cache at /001/usuarios/isaac.bribiesca/.cache/huggingface/transformers/356366feedcea0917e30f7f235e1e062ffc2d28138445d5672a184be756c8686.a2b6026e688d1b19cebc0981d8f3a5b1668eabfda55b2c42049d5eac0bc8cb2d
Model config RobertaConfig {
  "_name_or_path": "vinai/bertweet-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 130,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "tokenizer_class": "BertweetTokenizer",
  "transformers_version": "4.21.3",
  "type_vocab

Step,Training Loss
80,0.6846
160,0.6674
240,0.6502
320,0.6382
400,0.6345
480,0.6309
560,0.616
640,0.6314
720,0.6136
800,0.6158


Saving model checkpoint to checkPointsFE/checkpoint-500
Configuration saved in checkPointsFE/checkpoint-500/config.json
Model weights saved in checkPointsFE/checkpoint-500/pytorch_model.bin
Saving model checkpoint to checkPointsFE/checkpoint-1000
Configuration saved in checkPointsFE/checkpoint-1000/config.json
Model weights saved in checkPointsFE/checkpoint-1000/pytorch_model.bin
Saving model checkpoint to checkPointsFE/checkpoint-1500
Configuration saved in checkPointsFE/checkpoint-1500/config.json
Model weights saved in checkPointsFE/checkpoint-1500/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Prediction *****
  Num examples = 1600
  Batch size = 200


100%|████████████████████████████████████████████████████████████████████████| 40/40 [00:00<00:00, 961.46it/s]

Results with split 5:

soft voting:
               precision    recall  f1-score   support

           0     0.5500    0.5500    0.5500        20
           1     0.5500    0.5500    0.5500        20

    accuracy                         0.5500        40
   macro avg     0.5500    0.5500    0.5500        40
weighted avg     0.5500    0.5500    0.5500        40
 

hard voting:
               precision    recall  f1-score   support

           0     0.5455    0.6000    0.5714        20
           1     0.5556    0.5000    0.5263        20

    accuracy                         0.5500        40
   macro avg     0.5505    0.5500    0.5489        40
weighted avg     0.5505    0.5500    0.5489        40






In [13]:
# report statistics

print('Soft results: ', f1s_soft)
print('\nHard results: ', f1s_hard)

f1s_soft = np.array(f1s_soft)
f1s_hard = np.array(f1s_hard)

FewShot_Results = {'soft': [f1s_soft.mean(), f1s_soft.std()], 'hard': [f1s_hard.mean(), f1s_hard.std()]}

print('\n\nSoft statistics: ')
print('\t[avg, std]:', FewShot_Results['soft'])

print('\nHard statistics: ')
print('\t[avg, std]:', FewShot_Results['hard'])

Soft results:  [0.7714285714285714, 0.6698412698412699, 0.5247029393370857, 0.7, 0.55]

Hard results:  [0.7442455242966751, 0.6419437340153452, 0.5247029393370857, 0.75, 0.5488721804511277]


Soft statistics: 
	[avg, std]: [0.6431945561213853, 0.09285080185646098]

Hard statistics: 
	[avg, std]: [0.6419528756200468, 0.09439229896036413]


## Training and Testing

In [11]:
from tools.DataLoaders import DatasetPAN

baseTest.get_all_data(_TWEET_BATCH_SIZE_, tokenizer, _MAX_SEQ_LEN_, _PREPROCESS_TEXT_)

Test = DatasetPAN(baseTest, label_name)


Reading data...
    Done
Preprocessing text...
    Done
Tokenizing...
    Done
Merging data...
    Done

Total Instances: 4000



In [12]:
from transformers import TrainingArguments

samples = 2 * _NUM_AUTHORS_ * int(100 / _TWEET_BATCH_SIZE_)
_LOGGING_STEPS_ = int(samples / _BATCH_SIZE_)

training_args = TrainingArguments(
    learning_rate               = _LEARNING_RATE_,
    num_train_epochs            = _EPOCHS_,
    per_device_train_batch_size = _BATCH_SIZE_,
    per_device_eval_batch_size  = 200,
    logging_steps               = _LOGGING_STEPS_,
    output_dir                  = _OUTPUT_DIR_,
    save_total_limit            = 10,
    overwrite_output_dir        = True,
    remove_unused_columns       = False,
)

In [13]:
from transformers import AutoModelForSequenceClassification
from tools.DataLoaders import DatasetCrossVal
from transformers import Trainer
from tools.Testing import compute_author_predictions
from sklearn.metrics import f1_score, classification_report
import pickle


# train

task = label_name

f1s_soft = []
f1s_hard = []

for split in range( _K_FOLD_CV_ ):
    
    # loaders for current split ------------------------------------------
    
    authors_train, authors_val = crossVal_splits[split]
    
    Train = DatasetCrossVal(baseTrain, authors_train, task)
    Val   = DatasetCrossVal(baseTrain, authors_val  , task)
    
    
    # initialize model ---------------------------------------------------
    
    model = AutoModelForSequenceClassification.from_pretrained(_PRETRAINED_LM_, num_labels = len(class_dict))
    
    for name, param in model.named_parameters():
        if 'classifier' in name:
            param.requires_grad = True
        else:
            param.requires_grad = False
    
    # create trainer and train -------------------------------------------
        
    trainer = Trainer(
        model           = model,
        args            = training_args,
        train_dataset   = Train,
    )
    trainer.args._n_gpu = _NO_GPUS_

    trainer.train()
    
    
    # get predictions ----------------------------------------------------
    
    results            = trainer.predict(Test)
    author_predictions = compute_author_predictions(baseTest, results.predictions, task, len(class_dict))
    
    
    # report metrics -----------------------------------------------------
    
    report = {'soft': classification_report(author_predictions['true'], author_predictions['pred_soft'], digits=4), 
               'hard': classification_report(author_predictions['true'], author_predictions['pred_hard'], digits=4)}

    f1s_soft.append( f1_score(author_predictions['true'], author_predictions['pred_soft'], average = 'macro') )
    f1s_hard.append( f1_score(author_predictions['true'], author_predictions['pred_hard'], average = 'macro') )

    print("Results with split " + str(split + 1) + ":\n")
    print("soft voting:\n", report['soft'], '\n')
    print("hard voting:\n", report['hard'])
     
    
    # save predictions ----------------------------------------------------
    
    DIR = 'results/' + _DATASET_ + '/' + _LANGUAGE_ + '/' + _PRED_DIR_ + '/' + str(_NUM_AUTHORS_) + '_authors/test_split_' + str(split + 1) + '/'
    if not os.path.exists(DIR):
        os.makedirs(DIR)

    with open(DIR + 'predictions.pickle', 'wb') as f:
        pickle.dump(author_predictions, f)

    with open(DIR + 'report.txt', 'w') as f:
        f.write("soft voting:\n" + report['soft'] + '\n\n')
        f.write("hard voting:\n" + report['hard'])
    


Some weights of the model checkpoint at vinai/bertweet-base were not used when initializing RobertaForSequenceClassification: ['lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'lm_head.bias', 'roberta.pooler.dense.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.bias', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: 

Step,Training Loss
80,0.6523
160,0.5879
240,0.5541
320,0.5373
400,0.5133
480,0.5038
560,0.4943
640,0.5018
720,0.4955
800,0.4885


Saving model checkpoint to checkPointsFE/checkpoint-500
Configuration saved in checkPointsFE/checkpoint-500/config.json
Model weights saved in checkPointsFE/checkpoint-500/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Prediction *****
  Num examples = 4000
  Batch size = 200


100%|██████████████████████████████████████████████████████████████████████| 200/200 [00:00<00:00, 703.44it/s]


Results with split 1:

soft voting:
               precision    recall  f1-score   support

           0     0.6923    0.7200    0.7059       100
           1     0.7083    0.6800    0.6939       100

    accuracy                         0.7000       200
   macro avg     0.7003    0.7000    0.6999       200
weighted avg     0.7003    0.7000    0.6999       200
 

hard voting:
               precision    recall  f1-score   support

           0     0.6852    0.7400    0.7115       100
           1     0.7174    0.6600    0.6875       100

    accuracy                         0.7000       200
   macro avg     0.7013    0.7000    0.6995       200
weighted avg     0.7013    0.7000    0.6995       200



loading configuration file https://huggingface.co/vinai/bertweet-base/resolve/main/config.json from cache at /001/usuarios/isaac.bribiesca/.cache/huggingface/transformers/356366feedcea0917e30f7f235e1e062ffc2d28138445d5672a184be756c8686.a2b6026e688d1b19cebc0981d8f3a5b1668eabfda55b2c42049d5eac0bc8cb2d
Model config RobertaConfig {
  "_name_or_path": "vinai/bertweet-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 130,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "tokenizer_class": "BertweetTokenizer",
  "transformers_version": "4.21.3",
  "type_vocab

Step,Training Loss
80,0.6632
160,0.6128
240,0.5776
320,0.5586
400,0.5391
480,0.5355
560,0.5222
640,0.515
720,0.5156
800,0.5121


Saving model checkpoint to checkPointsFE/checkpoint-500
Configuration saved in checkPointsFE/checkpoint-500/config.json
Model weights saved in checkPointsFE/checkpoint-500/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Prediction *****
  Num examples = 4000
  Batch size = 200


100%|██████████████████████████████████████████████████████████████████████| 200/200 [00:00<00:00, 983.60it/s]


Results with split 2:

soft voting:
               precision    recall  f1-score   support

           0     0.6701    0.6500    0.6599       100
           1     0.6602    0.6800    0.6700       100

    accuracy                         0.6650       200
   macro avg     0.6651    0.6650    0.6649       200
weighted avg     0.6651    0.6650    0.6649       200
 

hard voting:
               precision    recall  f1-score   support

           0     0.6296    0.6800    0.6538       100
           1     0.6522    0.6000    0.6250       100

    accuracy                         0.6400       200
   macro avg     0.6409    0.6400    0.6394       200
weighted avg     0.6409    0.6400    0.6394       200



loading configuration file https://huggingface.co/vinai/bertweet-base/resolve/main/config.json from cache at /001/usuarios/isaac.bribiesca/.cache/huggingface/transformers/356366feedcea0917e30f7f235e1e062ffc2d28138445d5672a184be756c8686.a2b6026e688d1b19cebc0981d8f3a5b1668eabfda55b2c42049d5eac0bc8cb2d
Model config RobertaConfig {
  "_name_or_path": "vinai/bertweet-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 130,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "tokenizer_class": "BertweetTokenizer",
  "transformers_version": "4.21.3",
  "type_vocab

Step,Training Loss
80,0.6432
160,0.5801
240,0.5441
320,0.5303
400,0.5182
480,0.5072
560,0.5063
640,0.4937
720,0.5048
800,0.4908


Saving model checkpoint to checkPointsFE/checkpoint-500
Configuration saved in checkPointsFE/checkpoint-500/config.json
Model weights saved in checkPointsFE/checkpoint-500/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Prediction *****
  Num examples = 4000
  Batch size = 200


100%|██████████████████████████████████████████████████████████████████████| 200/200 [00:00<00:00, 744.56it/s]


Results with split 3:

soft voting:
               precision    recall  f1-score   support

           0     0.6707    0.5500    0.6044       100
           1     0.6186    0.7300    0.6697       100

    accuracy                         0.6400       200
   macro avg     0.6447    0.6400    0.6371       200
weighted avg     0.6447    0.6400    0.6371       200
 

hard voting:
               precision    recall  f1-score   support

           0     0.6829    0.5600    0.6154       100
           1     0.6271    0.7400    0.6789       100

    accuracy                         0.6500       200
   macro avg     0.6550    0.6500    0.6471       200
weighted avg     0.6550    0.6500    0.6471       200



loading configuration file https://huggingface.co/vinai/bertweet-base/resolve/main/config.json from cache at /001/usuarios/isaac.bribiesca/.cache/huggingface/transformers/356366feedcea0917e30f7f235e1e062ffc2d28138445d5672a184be756c8686.a2b6026e688d1b19cebc0981d8f3a5b1668eabfda55b2c42049d5eac0bc8cb2d
Model config RobertaConfig {
  "_name_or_path": "vinai/bertweet-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 130,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "tokenizer_class": "BertweetTokenizer",
  "transformers_version": "4.21.3",
  "type_vocab

Step,Training Loss
80,0.6576
160,0.6005
240,0.5568
320,0.5398
400,0.522
480,0.5113
560,0.4987
640,0.497
720,0.4949
800,0.4921


Saving model checkpoint to checkPointsFE/checkpoint-500
Configuration saved in checkPointsFE/checkpoint-500/config.json
Model weights saved in checkPointsFE/checkpoint-500/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Prediction *****
  Num examples = 4000
  Batch size = 200


100%|██████████████████████████████████████████████████████████████████████| 200/200 [00:00<00:00, 770.54it/s]


Results with split 4:

soft voting:
               precision    recall  f1-score   support

           0     0.6852    0.7400    0.7115       100
           1     0.7174    0.6600    0.6875       100

    accuracy                         0.7000       200
   macro avg     0.7013    0.7000    0.6995       200
weighted avg     0.7013    0.7000    0.6995       200
 

hard voting:
               precision    recall  f1-score   support

           0     0.6574    0.7100    0.6827       100
           1     0.6848    0.6300    0.6563       100

    accuracy                         0.6700       200
   macro avg     0.6711    0.6700    0.6695       200
weighted avg     0.6711    0.6700    0.6695       200



loading configuration file https://huggingface.co/vinai/bertweet-base/resolve/main/config.json from cache at /001/usuarios/isaac.bribiesca/.cache/huggingface/transformers/356366feedcea0917e30f7f235e1e062ffc2d28138445d5672a184be756c8686.a2b6026e688d1b19cebc0981d8f3a5b1668eabfda55b2c42049d5eac0bc8cb2d
Model config RobertaConfig {
  "_name_or_path": "vinai/bertweet-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 130,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "tokenizer_class": "BertweetTokenizer",
  "transformers_version": "4.21.3",
  "type_vocab

Step,Training Loss
80,0.6562
160,0.6072
240,0.5716
320,0.5588
400,0.5416
480,0.531
560,0.5267
640,0.5183
720,0.5234
800,0.5154


Saving model checkpoint to checkPointsFE/checkpoint-500
Configuration saved in checkPointsFE/checkpoint-500/config.json
Model weights saved in checkPointsFE/checkpoint-500/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Prediction *****
  Num examples = 4000
  Batch size = 200


100%|██████████████████████████████████████████████████████████████████████| 200/200 [00:00<00:00, 894.81it/s]

Results with split 5:

soft voting:
               precision    recall  f1-score   support

           0     0.7333    0.5500    0.6286       100
           1     0.6400    0.8000    0.7111       100

    accuracy                         0.6750       200
   macro avg     0.6867    0.6750    0.6698       200
weighted avg     0.6867    0.6750    0.6698       200
 

hard voting:
               precision    recall  f1-score   support

           0     0.7162    0.5300    0.6092       100
           1     0.6270    0.7900    0.6991       100

    accuracy                         0.6600       200
   macro avg     0.6716    0.6600    0.6542       200
weighted avg     0.6716    0.6600    0.6542       200






In [14]:
# report statistics

print('Soft results: ', f1s_soft)
print('\nHard results: ', f1s_hard)

f1s_soft = np.array(f1s_soft)
f1s_hard = np.array(f1s_hard)

FewShot_Results = {'soft': [f1s_soft.mean(), f1s_soft.std()], 'hard': [f1s_hard.mean(), f1s_hard.std()]}

print('\n\nSoft statistics: ')
print('\t[avg, std]:', FewShot_Results['soft'])

print('\nHard statistics: ')
print('\t[avg, std]:', FewShot_Results['hard'])

Soft results:  [0.6998799519807923, 0.6649246080368083, 0.6370601875189031, 0.6995192307692307, 0.6698412698412699]

Hard results:  [0.6995192307692307, 0.639423076923077, 0.6471418489767113, 0.669471153846154, 0.6541552232733191]


Soft statistics: 
	[avg, std]: [0.6742450496294008, 0.023600467490854145]

Hard statistics: 
	[avg, std]: [0.6619421067576985, 0.021239193433432436]
