## Set Global Seed 

In [1]:
import os
import random
import numpy as np
import torch
import transformers

def set_all_seeds(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed = 260615
set_all_seeds(seed)

print("The global seed " + str(seed))

The global seed 260615


## Hyperparameters

In [2]:
# LANGUAGE

_LANGUAGE_         = 'en'
_DATASET_          = '2015'

In [3]:
# MODEL CLASSIFICATION

_PRETRAINED_LM_    = 'vinai/bertweet-base'
_PREPROCESS_TEXT_  = True
_TWEET_BATCH_SIZE_ = 5
_ADAPTER_CONFIG_   = transformers.ParallelConfig(reduction_factor = 256)
_MAX_SEQ_LEN_      = 128

In [4]:
# TRAIN

_OUTPUT_DIR_       = 'checkPointsFFT'
_LOGGING_STEPS_    = 50
_NUM_AUTHORS_      = 16
_K_FOLD_CV_        = 5
_NO_GPUS_          = 1
_BATCH_SIZE_       = int(32 / _NO_GPUS_)
_EPOCHS_           = 10
_LEARNING_RATE_    = 1e-5

# PREDICTIONS

_PRED_DIR_         = 'FFTCustomTrainer'

## Other parameters

In [5]:
# LABEL DICTONARIES -----------------------------------------------------------------------

# 2015

age_dict  = {'18-24': 0, '25-34': 1, '35-49': 2, '50-XX': 3}
ageEN_hyp = {0: '18-24', 1: '25-34', 2: '35-49', 3: '50-XX'}
ageES_hyp = {0: 'La edad de esta persona es entre 18 y 24 años', 
             1: 'La edad de esta persona es entre 25 y 34 años', 
             2: 'La edad de esta persona es entre 35 y 49 años', 
             3: 'La edad de esta persona es más de 50 años'}

# 2017

gender_dict    = {'female': 0, 'male':   1}
varietyEN_dict = {'australia': 0, 'canada': 1, 'great britain': 2, 'ireland': 3, 'new zealand': 4, 'united states': 5}
varietyES_dict = {'argentina': 0, 'chile': 1, 'colombia': 2, 'mexico': 3, 'peru': 4, 'spain': 5, 'venezuela': 6}  

genderEN_hyp  = {0: 'I’m a female', 1: 'I’m a male'}
genderES_hyp  = {0: 'Mi nombre es María', 1: 'Mi nombre es José'}

# 2019

bots_dict  = {'human': 0, 'bot': 1}
botsEN_hyp = {0: 'This is a text from a person', 1: 'This is a text from a machine'}
botsES_hyp = {0: 'Humano', 1: 'Bot'}

# 2020 

fakeNews_dict  = {'0': 0, '1': 1}
fakeNewsEN_hyp = {0: 'This author is a normal user', 1: 'This author spreads fake news'}
fakeNewsES_hyp = {0: 'Este autor es un usuario normal', 1: 'Este autor publica noticias falsas'}

# 2021

hateSpeech_dict  = {'0': 0, '1': 1}
hateSpeechEN_hyp = {0: 'This text does not contain hate speech', 1: 'This text expresses prejudice and hate speech'}
hateSpeechES_hyp = {0: 'Este texto es moderado, respetuoso, cortés y civilizado', 1: 'Este texto expresa odio o prejuicios'}

In [6]:
# SET LANGUAGE DICTIONARIES --------------------------------------------------

if _LANGUAGE_ == 'en':
    age_hyp        = ageEN_hyp
    gender_hyp     = genderEN_hyp
    variety_dict   = varietyEN_dict
    fakeNews_hyp   = fakeNewsEN_hyp
    hateSpeech_hyp = hateSpeechEN_hyp
    bots_hyp       = botsEN_hyp 

elif _LANGUAGE_ == 'es':
    age_hyp        = ageES_hyp
    gender_hyp     = genderES_hyp
    variety_dict   = varietyES_dict
    fakeNews_hyp   = fakeNewsES_hyp
    hateSpeech_hyp = hateSpeechES_hyp
    bots_hyp       = botsES_hyp
    
    
# SET LANGUAGE AND DATASET PARAMETERS ----------------------------------------
    
if   _DATASET_ == '2015':
    label_idx  = 2
    class_dict = age_dict
    label_name = 'age'
    label_hyp  = age_hyp
    
elif _DATASET_ == '2017':
    label_idx  = 1
    class_dict = gender_dict
    label_name = 'gender'
    label_hyp  = gender_hyp
    
elif _DATASET_ == '2019':
    label_idx  = 1
    class_dict = bots_dict
    label_name = 'bots'
    label_hyp  = bots_hyp
    
elif _DATASET_ == '2020':
    label_idx  = 1
    class_dict = fakeNews_dict
    label_name = 'fakeNews'
    label_hyp  = fakeNews_hyp
    
elif _DATASET_ == '2021':
    label_idx  = 1
    class_dict = hateSpeech_dict
    label_name = 'hateSpeech'
    label_hyp  = hateSpeech_hyp
    

In [7]:
# SET LANGUAGE TOKENIZER

from transformers import AutoTokenizer, PretrainedConfig

tokenizer = AutoTokenizer.from_pretrained(_PRETRAINED_LM_)
vocab = tokenizer.get_vocab()

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


## Datasets

In [8]:
# GET AUTHORS AND LABELS -----------------------------------------------------

from tools.DataLoaders import BasePAN

baseTrain  = BasePAN(Dir        = 'data/' + _DATASET_,
                     split      = 'train',
                     language   = _LANGUAGE_,
                     label_idx  = label_idx,
                     class_dict = class_dict,
                     label_name = label_name)

baseTest   = BasePAN(Dir        = 'data/' + _DATASET_,
                     split      = 'test',
                     language   = _LANGUAGE_,
                     label_idx  = label_idx,
                     class_dict = class_dict,
                     label_name = label_name)


In [9]:
# GET K-FOLD SPLITS -----------------------------------------------------

crossVal_splits = baseTrain.cross_val(_K_FOLD_CV_, _NUM_AUTHORS_)

crossVal_splits[0][0]

['e78b60f0-b114-4238-a0b2-dbd52ab12b99',
 'eb5cea6d-ecc7-4c1c-b149-dffb4e4d9373',
 'c65228f6-59a4-41a9-8d06-788406e5ff7e',
 'a1e96b2b-17eb-4450-911b-751a31fadf15',
 'ce325322-8731-426d-837a-60ea688fe82d',
 '541812a8-23ca-40a2-ba5a-2ae0908eada2',
 '23842323-5699-4551-9ff6-a684dee93a9a',
 'f1dcc4ff-0c7c-4fee-9a65-04a13285f327',
 'c2253559-eb87-44ac-9a0b-a1d1a0cdfd48',
 'ae414abd-cd83-4f0d-bc8c-d0a34a0071a3',
 '522a6f2f-d308-4b0e-ab31-0ab817e1f5d6',
 'f3af2ca2-dbfb-4447-a4f2-dfdfdda58640',
 '18fc8412-8068-4993-9382-f44d716f092e',
 'b7e9f372-21a8-461b-b4f0-950205f84da0',
 'dde4cfc8-f4cf-47be-8d9c-21b3b7f63098',
 '59c19daa-873b-434d-98d9-3b8c0168f946',
 'a6323e6d-213e-4105-920c-d375023e5645',
 '3aae4b14-7dfe-4cb7-b82a-629d01d06da0',
 '6c51bbf8-f00e-4ddf-8ff5-cf034a6cfbd8',
 '6a3addbf-f699-482d-ba2f-02248d52db95',
 '16b423e5-3154-42ef-ad37-3f80858febf5',
 '1aa8c430-853b-4bbc-b784-df4c88264ccd',
 'c4a413a1-1170-4c2c-9f97-3f0bfb474154',
 'fde8eb00-0444-4159-9b65-1ead60c2dc88',
 'dd453533-94b0-

In [10]:
# GET TWEETS -----------------------------------------------------

baseTrain.get_all_data(_TWEET_BATCH_SIZE_, tokenizer, _MAX_SEQ_LEN_, _PREPROCESS_TEXT_)


Reading data...
    Done
Preprocessing text...
    Done
Tokenizing...
    Done
Merging data...
    Done

Total Instances: 2846



## Training and Testing

In [11]:
from tools.DataLoaders import DatasetPAN

baseTest.get_all_data(_TWEET_BATCH_SIZE_, tokenizer, _MAX_SEQ_LEN_, _PREPROCESS_TEXT_)

Test = DatasetPAN(baseTest, label_name)


Reading data...
    Done
Preprocessing text...
    Done
Tokenizing...
    Done
Merging data...
    Done

Total Instances: 4000



In [12]:
from transformers import TrainingArguments

samples = 2 * _NUM_AUTHORS_ * int(100 / _TWEET_BATCH_SIZE_)
_LOGGING_STEPS_ = int(samples / _BATCH_SIZE_)

training_args = TrainingArguments(
    learning_rate               = _LEARNING_RATE_,
    num_train_epochs            = _EPOCHS_,
    per_device_train_batch_size = _BATCH_SIZE_,
    per_device_eval_batch_size  = 200,
    logging_steps               = _LOGGING_STEPS_,
    output_dir                  = _OUTPUT_DIR_,
    save_total_limit            = 10,
    overwrite_output_dir        = True,
    remove_unused_columns       = False,
)

In [13]:
from transformers import AutoModelForSequenceClassification
from tools.DataLoaders import DatasetCrossVal
from transformers import Trainer
from tools.Testing import compute_author_predictions
from sklearn.metrics import f1_score, classification_report
import pickle


# train

task = label_name

f1s_soft = []
f1s_hard = []

for split in range( _K_FOLD_CV_ ):
    
    # loaders for current split ------------------------------------------
    
    authors_train, authors_val = crossVal_splits[split]
    
    Train = DatasetCrossVal(baseTrain, authors_train, task)
    Val   = DatasetCrossVal(baseTrain, authors_val  , task)
    
    
    # initialize model ---------------------------------------------------
    
    model = AutoModelForSequenceClassification.from_pretrained(_PRETRAINED_LM_, num_labels = len(class_dict))
    
    
    # create trainer and train -------------------------------------------
        
    trainer = Trainer(
        model           = model,
        args            = training_args,
        train_dataset   = Train,
    )
    trainer.args._n_gpu = _NO_GPUS_

    trainer.train()
    
    
    # get predictions ----------------------------------------------------
    
    results            = trainer.predict(Test)
    author_predictions = compute_author_predictions(baseTest, results.predictions, task, len(class_dict))
    
    
    # report metrics -----------------------------------------------------
    
    report = {'soft': classification_report(author_predictions['true'], author_predictions['pred_soft'], digits=4), 
               'hard': classification_report(author_predictions['true'], author_predictions['pred_hard'], digits=4)}

    f1s_soft.append( f1_score(author_predictions['true'], author_predictions['pred_soft'], average = 'macro') )
    f1s_hard.append( f1_score(author_predictions['true'], author_predictions['pred_hard'], average = 'macro') )

    print("Results with split " + str(split + 1) + ":\n")
    print("soft voting:\n", report['soft'], '\n')
    print("hard voting:\n", report['hard'])
     
    
    # save predictions ----------------------------------------------------
    
    DIR = 'results/' + _DATASET_ + '/' + _LANGUAGE_ + '/' + _PRED_DIR_ + '/' + str(_NUM_AUTHORS_) + '_authors/test_split_' + str(split + 1) + '/'
    if not os.path.exists(DIR):
        os.makedirs(DIR)

    with open(DIR + 'predictions.pickle', 'wb') as f:
        pickle.dump(author_predictions, f)

    with open(DIR + 'report.txt', 'w') as f:
        f.write("soft voting:\n" + report['soft'] + '\n\n')
        f.write("hard voting:\n" + report['hard'])
    


Some weights of the model checkpoint at pysentimiento/robertuito-base-uncased were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at pysentimiento/robertuito-base-uncased and are newly initialized: ['classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.den

Step,Training Loss
20,0.6383
40,0.5283
60,0.4685
80,0.4433
100,0.3857
120,0.3797
140,0.3336
160,0.3163
180,0.2865
200,0.2754




Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Prediction *****
  Num examples = 4000
  Batch size = 200


100%|█████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 1264.35it/s]


Results with split 1:

soft voting:
               precision    recall  f1-score   support

           0     0.8649    0.6400    0.7356        50
           1     0.7143    0.9000    0.7965        50

    accuracy                         0.7700       100
   macro avg     0.7896    0.7700    0.7660       100
weighted avg     0.7896    0.7700    0.7660       100
 

hard voting:
               precision    recall  f1-score   support

           0     0.8421    0.6400    0.7273        50
           1     0.7097    0.8800    0.7857        50

    accuracy                         0.7600       100
   macro avg     0.7759    0.7600    0.7565       100
weighted avg     0.7759    0.7600    0.7565       100



loading configuration file https://huggingface.co/pysentimiento/robertuito-base-uncased/resolve/main/config.json from cache at /001/usuarios/isaac.bribiesca/.cache/huggingface/transformers/5212cb9b5b32726fce956daa9a21ee0a0c2b6e54c54d1af58c678217d85f8143.4cce50d5a926bf18fe43f2ea8d4596b505e97a64e6e700e993def66b06f1c83b
Model config RobertaConfig {
  "_name_or_path": "pysentimiento/robertuito-base-uncased",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 130,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "

Step,Training Loss
20,0.6897
40,0.6332
60,0.5706
80,0.5247
100,0.4706
120,0.4442
140,0.4024
160,0.3497
180,0.3058
200,0.3325




Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Prediction *****
  Num examples = 4000
  Batch size = 200


100%|█████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 1191.80it/s]


Results with split 2:

soft voting:
               precision    recall  f1-score   support

           0     0.7174    0.6600    0.6875        50
           1     0.6852    0.7400    0.7115        50

    accuracy                         0.7000       100
   macro avg     0.7013    0.7000    0.6995       100
weighted avg     0.7013    0.7000    0.6995       100
 

hard voting:
               precision    recall  f1-score   support

           0     0.7045    0.6200    0.6596        50
           1     0.6607    0.7400    0.6981        50

    accuracy                         0.6800       100
   macro avg     0.6826    0.6800    0.6788       100
weighted avg     0.6826    0.6800    0.6788       100



loading configuration file https://huggingface.co/pysentimiento/robertuito-base-uncased/resolve/main/config.json from cache at /001/usuarios/isaac.bribiesca/.cache/huggingface/transformers/5212cb9b5b32726fce956daa9a21ee0a0c2b6e54c54d1af58c678217d85f8143.4cce50d5a926bf18fe43f2ea8d4596b505e97a64e6e700e993def66b06f1c83b
Model config RobertaConfig {
  "_name_or_path": "pysentimiento/robertuito-base-uncased",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 130,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "

Step,Training Loss
20,0.6437
40,0.5029
60,0.4023
80,0.3509
100,0.2881
120,0.2797
140,0.2274
160,0.2175
180,0.1684
200,0.1829




Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Prediction *****
  Num examples = 4000
  Batch size = 200


100%|█████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 1245.10it/s]


Results with split 3:

soft voting:
               precision    recall  f1-score   support

           0     0.9091    0.6000    0.7229        50
           1     0.7015    0.9400    0.8034        50

    accuracy                         0.7700       100
   macro avg     0.8053    0.7700    0.7632       100
weighted avg     0.8053    0.7700    0.7632       100
 

hard voting:
               precision    recall  f1-score   support

           0     0.8571    0.6000    0.7059        50
           1     0.6923    0.9000    0.7826        50

    accuracy                         0.7500       100
   macro avg     0.7747    0.7500    0.7442       100
weighted avg     0.7747    0.7500    0.7442       100



loading configuration file https://huggingface.co/pysentimiento/robertuito-base-uncased/resolve/main/config.json from cache at /001/usuarios/isaac.bribiesca/.cache/huggingface/transformers/5212cb9b5b32726fce956daa9a21ee0a0c2b6e54c54d1af58c678217d85f8143.4cce50d5a926bf18fe43f2ea8d4596b505e97a64e6e700e993def66b06f1c83b
Model config RobertaConfig {
  "_name_or_path": "pysentimiento/robertuito-base-uncased",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 130,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "

Step,Training Loss
20,0.634
40,0.537
60,0.4365
80,0.4151
100,0.3836
120,0.3281
140,0.2833
160,0.2742
180,0.2337
200,0.2249




Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Prediction *****
  Num examples = 4000
  Batch size = 200


100%|██████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 835.19it/s]


Results with split 4:

soft voting:
               precision    recall  f1-score   support

           0     0.8889    0.6400    0.7442        50
           1     0.7188    0.9200    0.8070        50

    accuracy                         0.7800       100
   macro avg     0.8038    0.7800    0.7756       100
weighted avg     0.8038    0.7800    0.7756       100
 

hard voting:
               precision    recall  f1-score   support

           0     0.8889    0.6400    0.7442        50
           1     0.7188    0.9200    0.8070        50

    accuracy                         0.7800       100
   macro avg     0.8038    0.7800    0.7756       100
weighted avg     0.8038    0.7800    0.7756       100



loading configuration file https://huggingface.co/pysentimiento/robertuito-base-uncased/resolve/main/config.json from cache at /001/usuarios/isaac.bribiesca/.cache/huggingface/transformers/5212cb9b5b32726fce956daa9a21ee0a0c2b6e54c54d1af58c678217d85f8143.4cce50d5a926bf18fe43f2ea8d4596b505e97a64e6e700e993def66b06f1c83b
Model config RobertaConfig {
  "_name_or_path": "pysentimiento/robertuito-base-uncased",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 130,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "

Step,Training Loss
20,0.6743
40,0.6062
60,0.5265
80,0.4949
100,0.4304
120,0.4149
140,0.3562
160,0.3297
180,0.2768
200,0.2896




Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Prediction *****
  Num examples = 4000
  Batch size = 200


100%|█████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 1210.98it/s]

Results with split 5:

soft voting:
               precision    recall  f1-score   support

           0     0.7872    0.7400    0.7629        50
           1     0.7547    0.8000    0.7767        50

    accuracy                         0.7700       100
   macro avg     0.7710    0.7700    0.7698       100
weighted avg     0.7710    0.7700    0.7698       100
 

hard voting:
               precision    recall  f1-score   support

           0     0.8085    0.7600    0.7835        50
           1     0.7736    0.8200    0.7961        50

    accuracy                         0.7900       100
   macro avg     0.7910    0.7900    0.7898       100
weighted avg     0.7910    0.7900    0.7898       100






In [14]:
# report statistics

print('Soft results: ', f1s_soft)
print('\nHard results: ', f1s_hard)

f1s_soft = np.array(f1s_soft)
f1s_hard = np.array(f1s_hard)

FewShot_Results = {'soft': [f1s_soft.mean(), f1s_soft.std()], 'hard': [f1s_hard.mean(), f1s_hard.std()]}

print('\n\nSoft statistics: ')
print('\t[avg, std]:', FewShot_Results['soft'])

print('\nHard statistics: ')
print('\t[avg, std]:', FewShot_Results['hard'])

Soft results:  [0.7660461804495982, 0.6995192307692307, 0.7631551848419318, 0.7756017951856385, 0.769792813532179]

Hard results:  [0.7564935064935063, 0.6788438378161381, 0.7442455242966751, 0.7756017951856385, 0.7898108297467722]


Soft statistics: 
	[avg, std]: [0.7548230409557156, 0.027963353237124764]

Hard statistics: 
	[avg, std]: [0.7489990987077462, 0.03840266683137282]


## Training with all

In [11]:
from tools.DataLoaders import DatasetPAN

baseTest.get_all_data(_TWEET_BATCH_SIZE_, tokenizer, _MAX_SEQ_LEN_, _PREPROCESS_TEXT_)

Test  = DatasetPAN(baseTest , label_name)
Train = DatasetPAN(baseTrain, label_name)


Reading data...
    Done
Preprocessing text...
    Done
Tokenizing...
    Done
Merging data...
    Done

Total Instances: 2645



In [12]:
from transformers import TrainingArguments

samples = 4 * _NUM_AUTHORS_ * int(100 / _TWEET_BATCH_SIZE_)
_LOGGING_STEPS_ = int(samples / _BATCH_SIZE_)

training_args = TrainingArguments(
    learning_rate               = _LEARNING_RATE_,
    num_train_epochs            = _EPOCHS_,
    per_device_train_batch_size = _BATCH_SIZE_,
    per_device_eval_batch_size  = 200,
    logging_steps               = _LOGGING_STEPS_,
    output_dir                  = _OUTPUT_DIR_,
    save_total_limit            = 5,
    overwrite_output_dir        = True,
    remove_unused_columns       = False,
)

In [13]:
from transformers import AutoModelForSequenceClassification
from tools.DataLoaders import DatasetCrossVal
from transformers import Trainer
from tools.Testing import compute_author_predictions
from sklearn.metrics import f1_score, classification_report
import pickle


# train

task = label_name

# add adapter --------------------------------------------------------

model = AutoModelForSequenceClassification.from_pretrained(_PRETRAINED_LM_, num_labels = len(class_dict))


# create trainer and train -------------------------------------------

trainer = Trainer(
    model           = model,
    args            = training_args,
    train_dataset   = Train,
)
trainer.args._n_gpu = _NO_GPUS_

trainer.train()


# get predictions ----------------------------------------------------

results            = trainer.predict(Test)
author_predictions = compute_author_predictions(baseTest, results.predictions, task, len(class_dict))


# report metrics -----------------------------------------------------

report = {'soft': classification_report(author_predictions['true'], author_predictions['pred_soft'], digits=4), 
           'hard': classification_report(author_predictions['true'], author_predictions['pred_hard'], digits=4)}

Some weights of the model checkpoint at vinai/bertweet-base were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.bias', 'roberta.pooler.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.bias', 'roberta.pooler.dense.bias', 'lm_head.decoder.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: 

Step,Training Loss
40,1.1969
80,0.9635
120,0.8552
160,0.7531
200,0.7424
240,0.6606
280,0.592
320,0.5144
360,0.5326
400,0.4433


Saving model checkpoint to checkPointsFFT/checkpoint-500
Configuration saved in checkPointsFFT/checkpoint-500/config.json
Model weights saved in checkPointsFFT/checkpoint-500/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Prediction *****
  Num examples = 2645
  Batch size = 200


100%|█████████████████████████████████████████████████████████████████████| 142/142 [00:00<00:00, 2083.94it/s]


In [14]:

print("Results:\n")
print("soft voting:\n", report['soft'], '\n')
print("hard voting:\n", report['hard'])

Results:

soft voting:
               precision    recall  f1-score   support

           0     0.8667    0.9286    0.8966        56
           1     0.7778    0.8448    0.8099        58
           2     0.7857    0.5500    0.6471        20
           3     1.0000    0.6250    0.7692         8

    accuracy                         0.8239       142
   macro avg     0.8575    0.7371    0.7807       142
weighted avg     0.8265    0.8239    0.8189       142
 

hard voting:
               precision    recall  f1-score   support

           0     0.8387    0.9286    0.8814        56
           1     0.7705    0.8103    0.7899        58
           2     0.7857    0.5500    0.6471        20
           3     1.0000    0.6250    0.7692         8

    accuracy                         0.8099       142
   macro avg     0.8487    0.7285    0.7719       142
weighted avg     0.8125    0.8099    0.8047       142

