## Set Global Seed 

In [1]:
import os
import random
import numpy as np
import torch
import transformers

def set_all_seeds(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed = 260615
set_all_seeds(seed)

print("The global seed " + str(seed))

The global seed 260615


## Hyperparameters

In [2]:
# LANGUAGE

_LANGUAGE_         = 'en'
_DATASET_          = '2017'

In [3]:
# MODEL CLASSIFICATION

_PRETRAINED_LM_    = 'vinai/bertweet-base'
_PREPROCESS_TEXT_  = True
_TWEET_BATCH_SIZE_ = 5
_ADAPTER_CONFIG_   = transformers.ParallelConfig(reduction_factor = 256)
_MAX_SEQ_LEN_      = 128

In [4]:
# TRAIN

_OUTPUT_DIR_       = 'checkPoints'
_LOGGING_STEPS_    = 50
_NUM_AUTHORS_      = 64
_K_FOLD_CV_        = 5
_NO_GPUS_          = 1
_BATCH_SIZE_       = int(32 / _NO_GPUS_)
_EPOCHS_           = 10
_LEARNING_RATE_    = 1e-4

# PREDICTIONS

_PRED_DIR_         = 'ParallelTestGeneralized'

## Other parameters

In [5]:
# LABEL DICTONARIES -----------------------------------------------------------------------

# 2017

gender_dict    = {'female': 0, 'male':   1}
varietyEN_dict = {'australia': 0, 'canada': 1, 'great britain': 2, 'ireland': 3, 'new zealand': 4, 'united states': 5}
varietyES_dict = {'argentina': 0, 'chile': 1, 'colombia': 2, 'mexico': 3, 'peru': 4, 'spain': 5, 'venezuela': 6}  

genderEN_hyp  = {0: 'I’m a female', 1: 'I’m a male'}
genderES_hyp  = {0: 'Mi nombre es María', 1: 'Mi nombre es José'}

# 2019

bots_dict  = {'human': 0, 'bot': 1}
botsEN_hyp = {0: 'This is a text from a person', 1: 'This is a text from a machine'}
botsES_hyp = {0: 'Humano', 1: 'Bot'}

# 2020 

fakeNews_dict  = {'0': 0, '1': 1}
fakeNewsEN_hyp = {0: 'This author is a normal user', 1: 'This author spreads fake news'}
fakeNewsES_hyp = {0: 'Este autor es un usuario normal', 1: 'Este autor publica noticias falsas'}

# 2021

hateSpeech_dict  = {'0': 0, '1': 1}
hateSpeechEN_hyp = {0: 'This text does not contain hate speech', 1: 'This text expresses prejudice and hate speech'}
hateSpeechES_hyp = {0: 'Este texto es moderado, respetuoso, cortés y civilizado', 1: 'Este texto expresa odio o prejuicios'}

In [6]:
# SET LANGUAGE DICTIONARIES --------------------------------------------------

if _LANGUAGE_ == 'en':
    gender_hyp     = genderEN_hyp
    variety_dict   = varietyEN_dict
    fakeNews_hyp   = fakeNewsEN_hyp
    hateSpeech_hyp = hateSpeechEN_hyp
    bots_hyp       = botsEN_hyp 

elif _LANGUAGE_ == 'es':
    gender_hyp     = genderES_hyp
    variety_dict   = varietyES_dict
    fakeNews_hyp   = fakeNewsES_hyp
    hateSpeech_hyp = hateSpeechES_hyp
    bots_hyp       = botsES_hyp
    
    
# SET LANGUAGE AND DATASET PARAMETERS ----------------------------------------
    
if   _DATASET_ == '2017':
    label_idx  = 1
    class_dict = gender_dict
    label_name = 'gender'
    label_hyp  = gender_hyp
    
elif _DATASET_ == '2019':
    label_idx  = 1
    class_dict = bots_dict
    label_name = 'bots'
    label_hyp  = bots_hyp
    
elif _DATASET_ == '2020':
    label_idx  = 1
    class_dict = fakeNews_dict
    label_name = 'fakeNews'
    label_hyp  = fakeNews_hyp
    
elif _DATASET_ == '2021':
    label_idx  = 1
    class_dict = hateSpeech_dict
    label_name = 'hateSpeech'
    label_hyp  = hateSpeech_hyp
    

In [7]:
# SET LANGUAGE TOKENIZER

from transformers import AutoTokenizer, PretrainedConfig

tokenizer = AutoTokenizer.from_pretrained(_PRETRAINED_LM_)
vocab = tokenizer.get_vocab()

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


## Datasets

In [8]:
# GET AUTHORS AND LABELS -----------------------------------------------------

from tools.DataLoaders import BasePAN

baseTrain  = BasePAN(Dir        = 'data/' + _DATASET_,
                     split      = 'train',
                     language   = _LANGUAGE_,
                     label_idx  = label_idx,
                     class_dict = class_dict,
                     label_name = label_name)
"""
baseTest   = BasePAN(Dir        = 'data/' + _DATASET_,
                     split      = 'test',
                     language   = _LANGUAGE_,
                     label_idx  = label_idx,
                     class_dict = class_dict,
                     label_name = label_name)
"""

"\nbaseTest   = BasePAN(Dir        = 'data/' + _DATASET_,\n                     split      = 'test',\n                     language   = _LANGUAGE_,\n                     label_idx  = label_idx,\n                     class_dict = class_dict,\n                     label_name = label_name)\n"

In [9]:
# GET K-FOLD SPLITS -----------------------------------------------------

crossVal_splits = baseTrain.cross_val(_K_FOLD_CV_, _NUM_AUTHORS_)

crossVal_splits[0][0]

['98c59ff3e2cfc56b96ee3c49b3d46296',
 'aa917a8c5a4420b024274672667c7dc1',
 '86a7f84c2dd126dac46270b6c912952c',
 '9cff4936f8479d53fcbb63f2524c5ad8',
 '30be488aa93e8000aadb952a9cd5143c',
 'a8e2397021acef98cc32729cbda96910',
 '3770a07b212c1096c26e5a1f1556fbd1',
 'b8924a54bb6043c56969e20a328b76b3',
 '258ba7b57bc38e4987f9f3cf23700ece',
 '26644d1348fc1122e8c5ef45d6bc84fa',
 '54f81e27af90ed7c1c9409c332f0ca37',
 '58584745632b5367da1c7a9af746222b',
 '7f269488a6576c9dc21085c1e2854142',
 '6711ef348ffcb3e45d2957396a4c8026',
 '3df768933d03108ea4c6583d49c85c46',
 '365eb1e3abc5cd5394fec8fc162bfbc5',
 'b496caf332cb0ba97d2acefc44f153ac',
 'fdef657f264ca50bc7b21574b24f82ab',
 '76e152a7732922e7a6da39880486107f',
 '4253c341c1069eded30b6efd2df89ddc',
 '4f496db1408c402eb21d29e536667205',
 '4ae4ddc8cb2774c92398e3102c3da5b2',
 'f3eecd0eedab3b77558d93b1b92579a4',
 '4a1baf66990e0e540effd01f4b105f44',
 '748c4b31797d62bcce99de35a681b484',
 'a887ec85088a87e550015e2770a6e309',
 '9d58d6313bfb2fba9e1e45bb9d65cf0b',
 

In [10]:
# GET TWEETS -----------------------------------------------------

baseTrain.get_all_data(_TWEET_BATCH_SIZE_, tokenizer, _MAX_SEQ_LEN_, _PREPROCESS_TEXT_)


Reading data...
    Done
Preprocessing text...
    Done
Tokenizing...
    Done
Merging data...
    Done

Total Instances: 8000



## Training

In [11]:
from transformers import TrainingArguments

samples = 2 * _NUM_AUTHORS_ * int(100 / _TWEET_BATCH_SIZE_)
_LOGGING_STEPS_ = int(samples / _BATCH_SIZE_)

training_args = TrainingArguments(
    learning_rate               = _LEARNING_RATE_,
    num_train_epochs            = _EPOCHS_,
    per_device_train_batch_size = _BATCH_SIZE_,
    per_device_eval_batch_size  = 200,
    logging_steps               = _LOGGING_STEPS_,
    output_dir                  = _OUTPUT_DIR_,
    save_total_limit            = 10,
    overwrite_output_dir        = True,
    remove_unused_columns       = False,
)

In [12]:
from transformers import AutoAdapterModel
from tools.DataLoaders import DatasetCrossVal
from transformers import AdapterTrainer
from tools.Testing import compute_author_predictions
from sklearn.metrics import f1_score, classification_report
import pickle

# initialize base model

model = AutoAdapterModel.from_pretrained(_PRETRAINED_LM_)


# train

task = label_name

f1s_soft = []
f1s_hard = []

for split in range( _K_FOLD_CV_ ):
    
    # loaders for current split ------------------------------------------
    
    authors_train, authors_val = crossVal_splits[split]
    
    Train = DatasetCrossVal(baseTrain, authors_train, task)
    Val   = DatasetCrossVal(baseTrain, authors_val  , task)
    
    
    # add adapter --------------------------------------------------------
    
    model.add_adapter(adapter_name = task,config = _ADAPTER_CONFIG_)
    model.add_classification_head(head_name = task, num_labels = len(class_dict))
    
    model.set_active_adapters(task)
    model.train_adapter(task)
    
    
    # create trainer and train -------------------------------------------
        
    trainer = AdapterTrainer(
        model           = model,
        args            = training_args,
        train_dataset   = Train,
    )
    trainer.args._n_gpu = _NO_GPUS_

    trainer.train()
    
    
    # get predictions ----------------------------------------------------
    
    results            = trainer.predict(Val)
    author_predictions = compute_author_predictions(Val, results.predictions, task, len(class_dict))
    
    
    # report metrics -----------------------------------------------------
    
    report = {'soft': classification_report(author_predictions['true'], author_predictions['pred_soft'], digits=4), 
               'hard': classification_report(author_predictions['true'], author_predictions['pred_hard'], digits=4)}

    f1s_soft.append( f1_score(author_predictions['true'], author_predictions['pred_soft'], average = 'macro') )
    f1s_hard.append( f1_score(author_predictions['true'], author_predictions['pred_hard'], average = 'macro') )

    print("Results with split " + str(split + 1) + ":\n")
    print("soft voting:\n", report['soft'], '\n')
    print("hard voting:\n", report['hard'])
     
    
    # save predictions ----------------------------------------------------
    
    DIR = 'results/' + _DATASET_ + '/' + _LANGUAGE_ + '/' + _PRED_DIR_ + '/' + str(_NUM_AUTHORS_) + '_authors/split_' + str(split + 1) + '/'
    if not os.path.exists(DIR):
        os.makedirs(DIR)

    with open(DIR + 'predictions.pickle', 'wb') as f:
        pickle.dump(author_predictions, f)

    with open(DIR + 'report.txt', 'w') as f:
        f.write("soft voting:\n" + report['soft'] + '\n\n')
        f.write("hard voting:\n" + report['hard'])
    
    
    # delete adapter -------------------------------------------------------
    
    model.delete_adapter(task)
    model.delete_head(task)

Some weights of the model checkpoint at vinai/bertweet-base were not used when initializing RobertaAdapterModel: ['lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.decoder.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaAdapterModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaAdapterModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
***** Running training *****
  Num examples = 5120
  Num Epochs = 10
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps 

Step,Training Loss
80,0.6878
160,0.65
240,0.6161
320,0.6016
400,0.5946
480,0.5834
560,0.5648
640,0.5702
720,0.5418
800,0.5514


Saving model checkpoint to checkPoints/checkpoint-500
Configuration saved in checkPoints/checkpoint-500/hateSpeech/adapter_config.json
Module weights saved in checkPoints/checkpoint-500/hateSpeech/pytorch_adapter.bin
Configuration saved in checkPoints/checkpoint-500/hateSpeech/head_config.json
Module weights saved in checkPoints/checkpoint-500/hateSpeech/pytorch_model_head.bin
Configuration saved in checkPoints/checkpoint-500/hateSpeech/head_config.json
Module weights saved in checkPoints/checkpoint-500/hateSpeech/pytorch_model_head.bin
Saving model checkpoint to checkPoints/checkpoint-1000
Configuration saved in checkPoints/checkpoint-1000/hateSpeech/adapter_config.json
Module weights saved in checkPoints/checkpoint-1000/hateSpeech/pytorch_adapter.bin
Configuration saved in checkPoints/checkpoint-1000/hateSpeech/head_config.json
Module weights saved in checkPoints/checkpoint-1000/hateSpeech/pytorch_model_head.bin
Configuration saved in checkPoints/checkpoint-1000/hateSpeech/head_confi

100%|███████████████████████████████████████████████████████████████████████| 40/40 [00:00<00:00, 1026.16it/s]
Adding adapter 'hateSpeech'.
Adding head 'hateSpeech' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True}.
***** Running training *****
  Num examples = 5120
  Num Epochs = 10
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 1600


Results with split 1:

soft voting:
               precision    recall  f1-score   support

           0     0.7391    0.8500    0.7907        20
           1     0.8235    0.7000    0.7568        20

    accuracy                         0.7750        40
   macro avg     0.7813    0.7750    0.7737        40
weighted avg     0.7813    0.7750    0.7737        40
 

hard voting:
               precision    recall  f1-score   support

           0     0.7083    0.8500    0.7727        20
           1     0.8125    0.6500    0.7222        20

    accuracy                         0.7500        40
   macro avg     0.7604    0.7500    0.7475        40
weighted avg     0.7604    0.7500    0.7475        40



Step,Training Loss
80,0.6862
160,0.6314
240,0.6038
320,0.6021
400,0.5806
480,0.5767
560,0.5617
640,0.5691
720,0.5494
800,0.5391


Saving model checkpoint to checkPoints/checkpoint-500
Configuration saved in checkPoints/checkpoint-500/hateSpeech/adapter_config.json
Module weights saved in checkPoints/checkpoint-500/hateSpeech/pytorch_adapter.bin
Configuration saved in checkPoints/checkpoint-500/hateSpeech/head_config.json
Module weights saved in checkPoints/checkpoint-500/hateSpeech/pytorch_model_head.bin
Configuration saved in checkPoints/checkpoint-500/hateSpeech/head_config.json
Module weights saved in checkPoints/checkpoint-500/hateSpeech/pytorch_model_head.bin
Saving model checkpoint to checkPoints/checkpoint-1000
Configuration saved in checkPoints/checkpoint-1000/hateSpeech/adapter_config.json
Module weights saved in checkPoints/checkpoint-1000/hateSpeech/pytorch_adapter.bin
Configuration saved in checkPoints/checkpoint-1000/hateSpeech/head_config.json
Module weights saved in checkPoints/checkpoint-1000/hateSpeech/pytorch_model_head.bin
Configuration saved in checkPoints/checkpoint-1000/hateSpeech/head_confi

100%|███████████████████████████████████████████████████████████████████████| 40/40 [00:00<00:00, 1017.73it/s]
Adding adapter 'hateSpeech'.
Adding head 'hateSpeech' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True}.
***** Running training *****
  Num examples = 5120
  Num Epochs = 10
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 1600


Results with split 2:

soft voting:
               precision    recall  f1-score   support

           0     0.7143    0.7500    0.7317        20
           1     0.7368    0.7000    0.7179        20

    accuracy                         0.7250        40
   macro avg     0.7256    0.7250    0.7248        40
weighted avg     0.7256    0.7250    0.7248        40
 

hard voting:
               precision    recall  f1-score   support

           0     0.7143    0.7500    0.7317        20
           1     0.7368    0.7000    0.7179        20

    accuracy                         0.7250        40
   macro avg     0.7256    0.7250    0.7248        40
weighted avg     0.7256    0.7250    0.7248        40



Step,Training Loss
80,0.6724
160,0.6167
240,0.5907
320,0.5679
400,0.5713
480,0.5526
560,0.544
640,0.5534
720,0.536
800,0.5289


Saving model checkpoint to checkPoints/checkpoint-500
Configuration saved in checkPoints/checkpoint-500/hateSpeech/adapter_config.json
Module weights saved in checkPoints/checkpoint-500/hateSpeech/pytorch_adapter.bin
Configuration saved in checkPoints/checkpoint-500/hateSpeech/head_config.json
Module weights saved in checkPoints/checkpoint-500/hateSpeech/pytorch_model_head.bin
Configuration saved in checkPoints/checkpoint-500/hateSpeech/head_config.json
Module weights saved in checkPoints/checkpoint-500/hateSpeech/pytorch_model_head.bin
Saving model checkpoint to checkPoints/checkpoint-1000
Configuration saved in checkPoints/checkpoint-1000/hateSpeech/adapter_config.json
Module weights saved in checkPoints/checkpoint-1000/hateSpeech/pytorch_adapter.bin
Configuration saved in checkPoints/checkpoint-1000/hateSpeech/head_config.json
Module weights saved in checkPoints/checkpoint-1000/hateSpeech/pytorch_model_head.bin
Configuration saved in checkPoints/checkpoint-1000/hateSpeech/head_confi

100%|████████████████████████████████████████████████████████████████████████| 40/40 [00:00<00:00, 848.77it/s]
Adding adapter 'hateSpeech'.
Adding head 'hateSpeech' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True}.
***** Running training *****
  Num examples = 5120
  Num Epochs = 10
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 1600


Results with split 3:

soft voting:
               precision    recall  f1-score   support

           0     0.5556    0.5000    0.5263        20
           1     0.5455    0.6000    0.5714        20

    accuracy                         0.5500        40
   macro avg     0.5505    0.5500    0.5489        40
weighted avg     0.5505    0.5500    0.5489        40
 

hard voting:
               precision    recall  f1-score   support

           0     0.6000    0.6000    0.6000        20
           1     0.6000    0.6000    0.6000        20

    accuracy                         0.6000        40
   macro avg     0.6000    0.6000    0.6000        40
weighted avg     0.6000    0.6000    0.6000        40



Step,Training Loss
80,0.6785
160,0.6271
240,0.5951
320,0.5929
400,0.5744
480,0.558
560,0.5527
640,0.5405
720,0.543
800,0.5102


Saving model checkpoint to checkPoints/checkpoint-500
Configuration saved in checkPoints/checkpoint-500/hateSpeech/adapter_config.json
Module weights saved in checkPoints/checkpoint-500/hateSpeech/pytorch_adapter.bin
Configuration saved in checkPoints/checkpoint-500/hateSpeech/head_config.json
Module weights saved in checkPoints/checkpoint-500/hateSpeech/pytorch_model_head.bin
Configuration saved in checkPoints/checkpoint-500/hateSpeech/head_config.json
Module weights saved in checkPoints/checkpoint-500/hateSpeech/pytorch_model_head.bin
Saving model checkpoint to checkPoints/checkpoint-1000
Configuration saved in checkPoints/checkpoint-1000/hateSpeech/adapter_config.json
Module weights saved in checkPoints/checkpoint-1000/hateSpeech/pytorch_adapter.bin
Configuration saved in checkPoints/checkpoint-1000/hateSpeech/head_config.json
Module weights saved in checkPoints/checkpoint-1000/hateSpeech/pytorch_model_head.bin
Configuration saved in checkPoints/checkpoint-1000/hateSpeech/head_confi

100%|███████████████████████████████████████████████████████████████████████| 40/40 [00:00<00:00, 1017.50it/s]
Adding adapter 'hateSpeech'.
Adding head 'hateSpeech' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True}.
***** Running training *****
  Num examples = 5120
  Num Epochs = 10
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 1600


Results with split 4:

soft voting:
               precision    recall  f1-score   support

           0     0.7273    0.8000    0.7619        20
           1     0.7778    0.7000    0.7368        20

    accuracy                         0.7500        40
   macro avg     0.7525    0.7500    0.7494        40
weighted avg     0.7525    0.7500    0.7494        40
 

hard voting:
               precision    recall  f1-score   support

           0     0.7273    0.8000    0.7619        20
           1     0.7778    0.7000    0.7368        20

    accuracy                         0.7500        40
   macro avg     0.7525    0.7500    0.7494        40
weighted avg     0.7525    0.7500    0.7494        40



Step,Training Loss
80,0.6839
160,0.6358
240,0.6082
320,0.5915
400,0.5796
480,0.5758
560,0.5566
640,0.5801
720,0.5482
800,0.5391


Saving model checkpoint to checkPoints/checkpoint-500
Configuration saved in checkPoints/checkpoint-500/hateSpeech/adapter_config.json
Module weights saved in checkPoints/checkpoint-500/hateSpeech/pytorch_adapter.bin
Configuration saved in checkPoints/checkpoint-500/hateSpeech/head_config.json
Module weights saved in checkPoints/checkpoint-500/hateSpeech/pytorch_model_head.bin
Configuration saved in checkPoints/checkpoint-500/hateSpeech/head_config.json
Module weights saved in checkPoints/checkpoint-500/hateSpeech/pytorch_model_head.bin
Saving model checkpoint to checkPoints/checkpoint-1000
Configuration saved in checkPoints/checkpoint-1000/hateSpeech/adapter_config.json
Module weights saved in checkPoints/checkpoint-1000/hateSpeech/pytorch_adapter.bin
Configuration saved in checkPoints/checkpoint-1000/hateSpeech/head_config.json
Module weights saved in checkPoints/checkpoint-1000/hateSpeech/pytorch_model_head.bin
Configuration saved in checkPoints/checkpoint-1000/hateSpeech/head_confi

100%|███████████████████████████████████████████████████████████████████████| 40/40 [00:00<00:00, 1041.77it/s]

Results with split 5:

soft voting:
               precision    recall  f1-score   support

           0     0.6471    0.5500    0.5946        20
           1     0.6087    0.7000    0.6512        20

    accuracy                         0.6250        40
   macro avg     0.6279    0.6250    0.6229        40
weighted avg     0.6279    0.6250    0.6229        40
 

hard voting:
               precision    recall  f1-score   support

           0     0.6111    0.5500    0.5789        20
           1     0.5909    0.6500    0.6190        20

    accuracy                         0.6000        40
   macro avg     0.6010    0.6000    0.5990        40
weighted avg     0.6010    0.6000    0.5990        40






In [13]:
# report statistics

print('Soft results: ', f1s_soft)
print('\nHard results: ', f1s_hard)

f1s_soft = np.array(f1s_soft)
f1s_hard = np.array(f1s_hard)

FewShot_Results = {'soft': [f1s_soft.mean(), f1s_soft.std()], 'hard': [f1s_hard.mean(), f1s_hard.std()]}

print('\n\nSoft statistics: ')
print('\t[avg, std]:', FewShot_Results['soft'])

print('\nHard statistics: ')
print('\t[avg, std]:', FewShot_Results['hard'])

Soft results:  [0.7737272155876807, 0.7248280175109443, 0.5488721804511277, 0.7493734335839599, 0.6228786926461345]

Hard results:  [0.7474747474747475, 0.7248280175109443, 0.6, 0.7493734335839599, 0.5989974937343359]


Soft statistics: 
	[avg, std]: [0.6839359079559694, 0.08483738249059343]

Hard statistics: 
	[avg, std]: [0.6841347384607974, 0.06964336536755464]


## Training and Testing

In [11]:
from tools.DataLoaders import DatasetPAN

baseTest.get_all_data(_TWEET_BATCH_SIZE_, tokenizer, _MAX_SEQ_LEN_, _PREPROCESS_TEXT_)

Test = DatasetPAN(baseTest, label_name)


Reading data...
    Done
Preprocessing text...
    Done
Tokenizing...
    Done
Merging data...
    Done

Total Instances: 4000



In [12]:
from transformers import TrainingArguments

samples = 2 * _NUM_AUTHORS_ * int(100 / _TWEET_BATCH_SIZE_)
_LOGGING_STEPS_ = int(samples / _BATCH_SIZE_)

training_args = TrainingArguments(
    learning_rate               = _LEARNING_RATE_,
    num_train_epochs            = _EPOCHS_,
    per_device_train_batch_size = _BATCH_SIZE_,
    per_device_eval_batch_size  = 200,
    logging_steps               = _LOGGING_STEPS_,
    output_dir                  = _OUTPUT_DIR_,
    save_total_limit            = 10,
    overwrite_output_dir        = True,
    remove_unused_columns       = False,
)

In [13]:
from transformers import AutoAdapterModel
from tools.DataLoaders import DatasetCrossVal
from transformers import AdapterTrainer
from tools.Testing import compute_author_predictions
from sklearn.metrics import f1_score, classification_report
import pickle

# initialize base model

model = AutoAdapterModel.from_pretrained(_PRETRAINED_LM_)


# train

task = label_name

f1s_soft = []
f1s_hard = []

for split in range( _K_FOLD_CV_ ):
    
    # loaders for current split ------------------------------------------
    
    authors_train, authors_val = crossVal_splits[split]
    
    Train = DatasetCrossVal(baseTrain, authors_train, task)
    Val   = DatasetCrossVal(baseTrain, authors_val  , task)
    
    
    # add adapter --------------------------------------------------------
    
    model.add_adapter(adapter_name = task,config = _ADAPTER_CONFIG_)
    model.add_classification_head(head_name = task, num_labels = len(class_dict))
    
    model.set_active_adapters(task)
    model.train_adapter(task)
    
    
    # create trainer and train -------------------------------------------
        
    trainer = AdapterTrainer(
        model           = model,
        args            = training_args,
        train_dataset   = Train,
    )
    trainer.args._n_gpu = _NO_GPUS_

    trainer.train()
    
    
    # get predictions ----------------------------------------------------
    
    results            = trainer.predict(Test)
    author_predictions = compute_author_predictions(baseTest, results.predictions, task, len(class_dict))
    
    
    # report metrics -----------------------------------------------------
    
    report = {'soft': classification_report(author_predictions['true'], author_predictions['pred_soft'], digits=4), 
               'hard': classification_report(author_predictions['true'], author_predictions['pred_hard'], digits=4)}

    f1s_soft.append( f1_score(author_predictions['true'], author_predictions['pred_soft'], average = 'macro') )
    f1s_hard.append( f1_score(author_predictions['true'], author_predictions['pred_hard'], average = 'macro') )

    print("Results with split " + str(split + 1) + ":\n")
    print("soft voting:\n", report['soft'], '\n')
    print("hard voting:\n", report['hard'])
     
    
    # save predictions ----------------------------------------------------
    
    DIR = 'results/' + _DATASET_ + '/' + _LANGUAGE_ + '/' + _PRED_DIR_ + '/' + str(_NUM_AUTHORS_) + '_authors/test_split_' + str(split + 1) + '/'
    if not os.path.exists(DIR):
        os.makedirs(DIR)

    with open(DIR + 'predictions.pickle', 'wb') as f:
        pickle.dump(author_predictions, f)

    with open(DIR + 'report.txt', 'w') as f:
        f.write("soft voting:\n" + report['soft'] + '\n\n')
        f.write("hard voting:\n" + report['hard'])
    
    
    # delete adapter -------------------------------------------------------
    
    model.delete_adapter(task)
    model.delete_head(task)

Some weights of the model checkpoint at vinai/bertweet-base were not used when initializing RobertaAdapterModel: ['lm_head.bias', 'lm_head.decoder.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaAdapterModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaAdapterModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
***** Running training *****
  Num examples = 2560
  Num Epochs = 10
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps 

Step,Training Loss
80,0.626
160,0.4853
240,0.4313
320,0.4052
400,0.3766
480,0.3669
560,0.3362
640,0.3287
720,0.3162
800,0.3007


Saving model checkpoint to checkPoints/checkpoint-500
Configuration saved in checkPoints/checkpoint-500/fakeNews/adapter_config.json
Module weights saved in checkPoints/checkpoint-500/fakeNews/pytorch_adapter.bin
Configuration saved in checkPoints/checkpoint-500/fakeNews/head_config.json
Module weights saved in checkPoints/checkpoint-500/fakeNews/pytorch_model_head.bin
Configuration saved in checkPoints/checkpoint-500/fakeNews/head_config.json
Module weights saved in checkPoints/checkpoint-500/fakeNews/pytorch_model_head.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Prediction *****
  Num examples = 4000
  Batch size = 200


100%|██████████████████████████████████████████████████████████████████████| 200/200 [00:00<00:00, 812.77it/s]
Adding adapter 'fakeNews'.
Adding head 'fakeNews' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True}.
***** Running training *****
  Num examples = 2560
  Num Epochs = 10
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 800


Results with split 1:

soft voting:
               precision    recall  f1-score   support

           0     0.6667    0.7000    0.6829       100
           1     0.6842    0.6500    0.6667       100

    accuracy                         0.6750       200
   macro avg     0.6754    0.6750    0.6748       200
weighted avg     0.6754    0.6750    0.6748       200
 

hard voting:
               precision    recall  f1-score   support

           0     0.6699    0.6900    0.6798       100
           1     0.6804    0.6600    0.6701       100

    accuracy                         0.6750       200
   macro avg     0.6752    0.6750    0.6749       200
weighted avg     0.6752    0.6750    0.6749       200



Step,Training Loss
80,0.6305
160,0.5122
240,0.4695
320,0.4243
400,0.3916
480,0.3673
560,0.3597
640,0.3379
720,0.3283
800,0.3229


Saving model checkpoint to checkPoints/checkpoint-500
Configuration saved in checkPoints/checkpoint-500/fakeNews/adapter_config.json
Module weights saved in checkPoints/checkpoint-500/fakeNews/pytorch_adapter.bin
Configuration saved in checkPoints/checkpoint-500/fakeNews/head_config.json
Module weights saved in checkPoints/checkpoint-500/fakeNews/pytorch_model_head.bin
Configuration saved in checkPoints/checkpoint-500/fakeNews/head_config.json
Module weights saved in checkPoints/checkpoint-500/fakeNews/pytorch_model_head.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Prediction *****
  Num examples = 4000
  Batch size = 200


100%|██████████████████████████████████████████████████████████████████████| 200/200 [00:00<00:00, 964.92it/s]
Adding adapter 'fakeNews'.
Adding head 'fakeNews' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True}.
***** Running training *****
  Num examples = 2560
  Num Epochs = 10
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 800


Results with split 2:

soft voting:
               precision    recall  f1-score   support

           0     0.6771    0.6500    0.6633       100
           1     0.6635    0.6900    0.6765       100

    accuracy                         0.6700       200
   macro avg     0.6703    0.6700    0.6699       200
weighted avg     0.6703    0.6700    0.6699       200
 

hard voting:
               precision    recall  f1-score   support

           0     0.6633    0.6500    0.6566       100
           1     0.6569    0.6700    0.6634       100

    accuracy                         0.6600       200
   macro avg     0.6601    0.6600    0.6600       200
weighted avg     0.6601    0.6600    0.6600       200



Step,Training Loss
80,0.6061
160,0.4805
240,0.4358
320,0.4149
400,0.3869
480,0.3615
560,0.3419
640,0.3288
720,0.3154
800,0.3118


Saving model checkpoint to checkPoints/checkpoint-500
Configuration saved in checkPoints/checkpoint-500/fakeNews/adapter_config.json
Module weights saved in checkPoints/checkpoint-500/fakeNews/pytorch_adapter.bin
Configuration saved in checkPoints/checkpoint-500/fakeNews/head_config.json
Module weights saved in checkPoints/checkpoint-500/fakeNews/pytorch_model_head.bin
Configuration saved in checkPoints/checkpoint-500/fakeNews/head_config.json
Module weights saved in checkPoints/checkpoint-500/fakeNews/pytorch_model_head.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Prediction *****
  Num examples = 4000
  Batch size = 200


100%|██████████████████████████████████████████████████████████████████████| 200/200 [00:00<00:00, 875.69it/s]
Adding adapter 'fakeNews'.
Adding head 'fakeNews' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True}.
***** Running training *****
  Num examples = 2560
  Num Epochs = 10
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 800


Results with split 3:

soft voting:
               precision    recall  f1-score   support

           0     0.7093    0.6100    0.6559       100
           1     0.6579    0.7500    0.7009       100

    accuracy                         0.6800       200
   macro avg     0.6836    0.6800    0.6784       200
weighted avg     0.6836    0.6800    0.6784       200
 

hard voting:
               precision    recall  f1-score   support

           0     0.6818    0.6000    0.6383       100
           1     0.6429    0.7200    0.6792       100

    accuracy                         0.6600       200
   macro avg     0.6623    0.6600    0.6588       200
weighted avg     0.6623    0.6600    0.6588       200



Step,Training Loss
80,0.6247
160,0.5057
240,0.4513
320,0.4232
400,0.371
480,0.3497
560,0.3192
640,0.3051
720,0.2905
800,0.2848


Saving model checkpoint to checkPoints/checkpoint-500
Configuration saved in checkPoints/checkpoint-500/fakeNews/adapter_config.json
Module weights saved in checkPoints/checkpoint-500/fakeNews/pytorch_adapter.bin
Configuration saved in checkPoints/checkpoint-500/fakeNews/head_config.json
Module weights saved in checkPoints/checkpoint-500/fakeNews/pytorch_model_head.bin
Configuration saved in checkPoints/checkpoint-500/fakeNews/head_config.json
Module weights saved in checkPoints/checkpoint-500/fakeNews/pytorch_model_head.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Prediction *****
  Num examples = 4000
  Batch size = 200


100%|█████████████████████████████████████████████████████████████████████| 200/200 [00:00<00:00, 1049.86it/s]
Adding adapter 'fakeNews'.
Adding head 'fakeNews' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True}.
***** Running training *****
  Num examples = 2560
  Num Epochs = 10
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 800


Results with split 4:

soft voting:
               precision    recall  f1-score   support

           0     0.6783    0.7800    0.7256       100
           1     0.7412    0.6300    0.6811       100

    accuracy                         0.7050       200
   macro avg     0.7097    0.7050    0.7033       200
weighted avg     0.7097    0.7050    0.7033       200
 

hard voting:
               precision    recall  f1-score   support

           0     0.6612    0.8000    0.7240       100
           1     0.7468    0.5900    0.6592       100

    accuracy                         0.6950       200
   macro avg     0.7040    0.6950    0.6916       200
weighted avg     0.7040    0.6950    0.6916       200



Step,Training Loss
80,0.634
160,0.5078
240,0.4476
320,0.4119
400,0.3851
480,0.3643
560,0.349
640,0.3297
720,0.3233
800,0.319


Saving model checkpoint to checkPoints/checkpoint-500
Configuration saved in checkPoints/checkpoint-500/fakeNews/adapter_config.json
Module weights saved in checkPoints/checkpoint-500/fakeNews/pytorch_adapter.bin
Configuration saved in checkPoints/checkpoint-500/fakeNews/head_config.json
Module weights saved in checkPoints/checkpoint-500/fakeNews/pytorch_model_head.bin
Configuration saved in checkPoints/checkpoint-500/fakeNews/head_config.json
Module weights saved in checkPoints/checkpoint-500/fakeNews/pytorch_model_head.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Prediction *****
  Num examples = 4000
  Batch size = 200


100%|██████████████████████████████████████████████████████████████████████| 200/200 [00:00<00:00, 867.79it/s]

Results with split 5:

soft voting:
               precision    recall  f1-score   support

           0     0.7632    0.5800    0.6591       100
           1     0.6613    0.8200    0.7321       100

    accuracy                         0.7000       200
   macro avg     0.7122    0.7000    0.6956       200
weighted avg     0.7122    0.7000    0.6956       200
 

hard voting:
               precision    recall  f1-score   support

           0     0.7532    0.5800    0.6554       100
           1     0.6585    0.8100    0.7265       100

    accuracy                         0.6950       200
   macro avg     0.7059    0.6950    0.6909       200
weighted avg     0.7059    0.6950    0.6909       200






In [14]:
# report statistics

print('Soft results: ', f1s_soft)
print('\nHard results: ', f1s_hard)

f1s_soft = np.array(f1s_soft)
f1s_hard = np.array(f1s_hard)

FewShot_Results = {'soft': [f1s_soft.mean(), f1s_soft.std()], 'hard': [f1s_hard.mean(), f1s_hard.std()]}

print('\n\nSoft statistics: ')
print('\t[avg, std]:', FewShot_Results['soft'])

print('\nHard statistics: ')
print('\t[avg, std]:', FewShot_Results['hard'])

Soft results:  [0.6747967479674797, 0.6698679471788715, 0.678424278966938, 0.7033312382149592, 0.6956168831168832]

Hard results:  [0.6749268585431722, 0.65996599659966, 0.6587715776796468, 0.6915998887737304, 0.6909123153707786]


Soft statistics: 
	[avg, std]: [0.6844074190890262, 0.012832164452313529]

Hard statistics: 
	[avg, std]: [0.6752353273933975, 0.014267932518559215]
