## Set Global Seed 

In [1]:
import os
import random
import numpy as np
import torch
import transformers

def set_all_seeds(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed = 260615
set_all_seeds(seed)

print("The global seed " + str(seed))

The global seed 260615


## Hyperparameters

In [2]:
# LANGUAGE

_LANGUAGE_         = 'en'
_DATASET_          = '2015'

In [3]:
# MODEL CLASSIFICATION

_PRETRAINED_LM_    = 'vinai/bertweet-base'
_PREPROCESS_TEXT_  = True
_TWEET_BATCH_SIZE_ = 5
_ADAPTER_CONFIG_   = transformers.ParallelConfig(reduction_factor = 256)
_MAX_SEQ_LEN_      = 128

In [4]:
# TRAIN

_OUTPUT_DIR_       = 'checkPointsFE'
_LOGGING_STEPS_    = 50
_NUM_AUTHORS_      = 16
_K_FOLD_CV_        = 5
_NO_GPUS_          = 1
_BATCH_SIZE_       = int(32 / _NO_GPUS_)
_EPOCHS_           = 10
_LEARNING_RATE_    = 1e-4

# PREDICTIONS

_PRED_DIR_         = 'FETestGeneralized'

## Other parameters

In [5]:
# LABEL DICTONARIES -----------------------------------------------------------------------

# 2015

age_dict  = {'18-24': 0, '25-34': 1, '35-49': 2, '50-XX': 3}
ageEN_hyp = {0: '18-24', 1: '25-34', 2: '35-49', 3: '50-XX'}
ageES_hyp = {0: 'La edad de esta persona es entre 18 y 24 años', 
             1: 'La edad de esta persona es entre 25 y 34 años', 
             2: 'La edad de esta persona es entre 35 y 49 años', 
             3: 'La edad de esta persona es más de 50 años'}

# 2017

gender_dict    = {'female': 0, 'male':   1}
varietyEN_dict = {'australia': 0, 'canada': 1, 'great britain': 2, 'ireland': 3, 'new zealand': 4, 'united states': 5}
varietyES_dict = {'argentina': 0, 'chile': 1, 'colombia': 2, 'mexico': 3, 'peru': 4, 'spain': 5, 'venezuela': 6}  

genderEN_hyp  = {0: 'I’m a female', 1: 'I’m a male'}
genderES_hyp  = {0: 'Mi nombre es María', 1: 'Mi nombre es José'}

# 2019

bots_dict  = {'human': 0, 'bot': 1}
botsEN_hyp = {0: 'This is a text from a person', 1: 'This is a text from a machine'}
botsES_hyp = {0: 'Humano', 1: 'Bot'}

# 2020 

fakeNews_dict  = {'0': 0, '1': 1}
fakeNewsEN_hyp = {0: 'This author is a normal user', 1: 'This author spreads fake news'}
fakeNewsES_hyp = {0: 'Este autor es un usuario normal', 1: 'Este autor publica noticias falsas'}

# 2021

hateSpeech_dict  = {'0': 0, '1': 1}
hateSpeechEN_hyp = {0: 'This text does not contain hate speech', 1: 'This text expresses prejudice and hate speech'}
hateSpeechES_hyp = {0: 'Este texto es moderado, respetuoso, cortés y civilizado', 1: 'Este texto expresa odio o prejuicios'}

In [6]:
# SET LANGUAGE DICTIONARIES --------------------------------------------------

if _LANGUAGE_ == 'en':
    age_hyp        = ageEN_hyp
    gender_hyp     = genderEN_hyp
    variety_dict   = varietyEN_dict
    fakeNews_hyp   = fakeNewsEN_hyp
    hateSpeech_hyp = hateSpeechEN_hyp
    bots_hyp       = botsEN_hyp 

elif _LANGUAGE_ == 'es':
    age_hyp        = ageES_hyp
    gender_hyp     = genderES_hyp
    variety_dict   = varietyES_dict
    fakeNews_hyp   = fakeNewsES_hyp
    hateSpeech_hyp = hateSpeechES_hyp
    bots_hyp       = botsES_hyp
    
    
# SET LANGUAGE AND DATASET PARAMETERS ----------------------------------------
    
if   _DATASET_ == '2015':
    label_idx  = 2
    class_dict = age_dict
    label_name = 'age'
    label_hyp  = age_hyp
    
elif _DATASET_ == '2017':
    label_idx  = 1
    class_dict = gender_dict
    label_name = 'gender'
    label_hyp  = gender_hyp
    
elif _DATASET_ == '2019':
    label_idx  = 1
    class_dict = bots_dict
    label_name = 'bots'
    label_hyp  = bots_hyp
    
elif _DATASET_ == '2020':
    label_idx  = 1
    class_dict = fakeNews_dict
    label_name = 'fakeNews'
    label_hyp  = fakeNews_hyp
    
elif _DATASET_ == '2021':
    label_idx  = 1
    class_dict = hateSpeech_dict
    label_name = 'hateSpeech'
    label_hyp  = hateSpeech_hyp
    

In [7]:
# SET LANGUAGE TOKENIZER

from transformers import AutoTokenizer, PretrainedConfig

tokenizer = AutoTokenizer.from_pretrained(_PRETRAINED_LM_)
vocab = tokenizer.get_vocab()

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


## Datasets

In [8]:
# GET AUTHORS AND LABELS -----------------------------------------------------

from tools.DataLoaders import BasePAN

baseTrain  = BasePAN(Dir        = 'data/' + _DATASET_,
                     split      = 'train',
                     language   = _LANGUAGE_,
                     label_idx  = label_idx,
                     class_dict = class_dict,
                     label_name = label_name)

baseTest   = BasePAN(Dir        = 'data/' + _DATASET_,
                     split      = 'test',
                     language   = _LANGUAGE_,
                     label_idx  = label_idx,
                     class_dict = class_dict,
                     label_name = label_name)


In [9]:
# GET K-FOLD SPLITS -----------------------------------------------------

crossVal_splits = baseTrain.cross_val(_K_FOLD_CV_, _NUM_AUTHORS_)

crossVal_splits[0][0]

['e78b60f0-b114-4238-a0b2-dbd52ab12b99',
 'eb5cea6d-ecc7-4c1c-b149-dffb4e4d9373',
 'c65228f6-59a4-41a9-8d06-788406e5ff7e',
 'a1e96b2b-17eb-4450-911b-751a31fadf15',
 'ce325322-8731-426d-837a-60ea688fe82d',
 '541812a8-23ca-40a2-ba5a-2ae0908eada2',
 '23842323-5699-4551-9ff6-a684dee93a9a',
 'f1dcc4ff-0c7c-4fee-9a65-04a13285f327',
 'c2253559-eb87-44ac-9a0b-a1d1a0cdfd48',
 'ae414abd-cd83-4f0d-bc8c-d0a34a0071a3',
 '522a6f2f-d308-4b0e-ab31-0ab817e1f5d6',
 'f3af2ca2-dbfb-4447-a4f2-dfdfdda58640',
 '18fc8412-8068-4993-9382-f44d716f092e',
 'b7e9f372-21a8-461b-b4f0-950205f84da0',
 'dde4cfc8-f4cf-47be-8d9c-21b3b7f63098',
 '59c19daa-873b-434d-98d9-3b8c0168f946',
 'a6323e6d-213e-4105-920c-d375023e5645',
 '3aae4b14-7dfe-4cb7-b82a-629d01d06da0',
 '6c51bbf8-f00e-4ddf-8ff5-cf034a6cfbd8',
 '6a3addbf-f699-482d-ba2f-02248d52db95',
 '16b423e5-3154-42ef-ad37-3f80858febf5',
 '1aa8c430-853b-4bbc-b784-df4c88264ccd',
 'c4a413a1-1170-4c2c-9f97-3f0bfb474154',
 'fde8eb00-0444-4159-9b65-1ead60c2dc88',
 'dd453533-94b0-

In [10]:
# GET TWEETS -----------------------------------------------------

baseTrain.get_all_data(_TWEET_BATCH_SIZE_, tokenizer, _MAX_SEQ_LEN_, _PREPROCESS_TEXT_)


Reading data...
    Done
Preprocessing text...
    Done
Tokenizing...
    Done
Merging data...
    Done

Total Instances: 2846



## Training and Testing

In [11]:
from tools.DataLoaders import DatasetPAN

baseTest.get_all_data(_TWEET_BATCH_SIZE_, tokenizer, _MAX_SEQ_LEN_, _PREPROCESS_TEXT_)

Test = DatasetPAN(baseTest, label_name)


Reading data...
    Done
Preprocessing text...
    Done
Tokenizing...
    Done
Merging data...
    Done

Total Instances: 4000



In [12]:
from transformers import TrainingArguments

samples = 2 * _NUM_AUTHORS_ * int(100 / _TWEET_BATCH_SIZE_)
_LOGGING_STEPS_ = int(samples / _BATCH_SIZE_)

training_args = TrainingArguments(
    learning_rate               = _LEARNING_RATE_,
    num_train_epochs            = _EPOCHS_,
    per_device_train_batch_size = _BATCH_SIZE_,
    per_device_eval_batch_size  = 200,
    logging_steps               = _LOGGING_STEPS_,
    output_dir                  = _OUTPUT_DIR_,
    save_total_limit            = 10,
    overwrite_output_dir        = True,
    remove_unused_columns       = False,
)

In [13]:
from transformers import AutoModelForSequenceClassification
from tools.DataLoaders import DatasetCrossVal
from transformers import Trainer
from tools.Testing import compute_author_predictions
from sklearn.metrics import f1_score, classification_report
import pickle


# train

task = label_name

f1s_soft = []
f1s_hard = []

for split in range( _K_FOLD_CV_ ):
    
    # loaders for current split ------------------------------------------
    
    authors_train, authors_val = crossVal_splits[split]
    
    Train = DatasetCrossVal(baseTrain, authors_train, task)
    Val   = DatasetCrossVal(baseTrain, authors_val  , task)
    
    
    # initialize model ---------------------------------------------------
    
    model = AutoModelForSequenceClassification.from_pretrained(_PRETRAINED_LM_, num_labels = len(class_dict))
    
    for name, param in model.named_parameters():
        if 'classifier' in name:
            param.requires_grad = True
        else:
            param.requires_grad = False
    
    # create trainer and train -------------------------------------------
        
    trainer = Trainer(
        model           = model,
        args            = training_args,
        train_dataset   = Train,
    )
    trainer.args._n_gpu = _NO_GPUS_

    trainer.train()
    
    
    # get predictions ----------------------------------------------------
    
    results            = trainer.predict(Test)
    author_predictions = compute_author_predictions(baseTest, results.predictions, task, len(class_dict))
    
    
    # report metrics -----------------------------------------------------
    
    report = {'soft': classification_report(author_predictions['true'], author_predictions['pred_soft'], digits=4), 
               'hard': classification_report(author_predictions['true'], author_predictions['pred_hard'], digits=4)}

    f1s_soft.append( f1_score(author_predictions['true'], author_predictions['pred_soft'], average = 'macro') )
    f1s_hard.append( f1_score(author_predictions['true'], author_predictions['pred_hard'], average = 'macro') )

    print("Results with split " + str(split + 1) + ":\n")
    print("soft voting:\n", report['soft'], '\n')
    print("hard voting:\n", report['hard'])
     
    
    # save predictions ----------------------------------------------------
    
    DIR = 'results/' + _DATASET_ + '/' + _LANGUAGE_ + '/' + _PRED_DIR_ + '/' + str(_NUM_AUTHORS_) + '_authors/test_split_' + str(split + 1) + '/'
    if not os.path.exists(DIR):
        os.makedirs(DIR)

    with open(DIR + 'predictions.pickle', 'wb') as f:
        pickle.dump(author_predictions, f)

    with open(DIR + 'report.txt', 'w') as f:
        f.write("soft voting:\n" + report['soft'] + '\n\n')
        f.write("hard voting:\n" + report['hard'])
    


Some weights of the model checkpoint at pysentimiento/robertuito-base-uncased were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at pysentimiento/robertuito-base-uncased and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.we

Step,Training Loss
20,0.5924
40,0.4851
60,0.4567
80,0.4651
100,0.4173
120,0.4415
140,0.4058
160,0.4125
180,0.4138
200,0.386




Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Prediction *****
  Num examples = 4000
  Batch size = 200


100%|█████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 1186.18it/s]


Results with split 1:

soft voting:
               precision    recall  f1-score   support

           0     0.8462    0.6600    0.7416        50
           1     0.7213    0.8800    0.7928        50

    accuracy                         0.7700       100
   macro avg     0.7837    0.7700    0.7672       100
weighted avg     0.7837    0.7700    0.7672       100
 

hard voting:
               precision    recall  f1-score   support

           0     0.8250    0.6600    0.7333        50
           1     0.7167    0.8600    0.7818        50

    accuracy                         0.7600       100
   macro avg     0.7708    0.7600    0.7576       100
weighted avg     0.7708    0.7600    0.7576       100



loading configuration file https://huggingface.co/pysentimiento/robertuito-base-uncased/resolve/main/config.json from cache at /001/usuarios/isaac.bribiesca/.cache/huggingface/transformers/5212cb9b5b32726fce956daa9a21ee0a0c2b6e54c54d1af58c678217d85f8143.4cce50d5a926bf18fe43f2ea8d4596b505e97a64e6e700e993def66b06f1c83b
Model config RobertaConfig {
  "_name_or_path": "pysentimiento/robertuito-base-uncased",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 130,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "

Step,Training Loss
20,0.6526
40,0.5763
60,0.533
80,0.4876
100,0.47
120,0.4687
140,0.4465
160,0.4251
180,0.4197
200,0.4559




Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Prediction *****
  Num examples = 4000
  Batch size = 200


100%|██████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 713.48it/s]


Results with split 2:

soft voting:
               precision    recall  f1-score   support

           0     0.7000    0.5600    0.6222        50
           1     0.6333    0.7600    0.6909        50

    accuracy                         0.6600       100
   macro avg     0.6667    0.6600    0.6566       100
weighted avg     0.6667    0.6600    0.6566       100
 

hard voting:
               precision    recall  f1-score   support

           0     0.7073    0.5800    0.6374        50
           1     0.6441    0.7600    0.6972        50

    accuracy                         0.6700       100
   macro avg     0.6757    0.6700    0.6673       100
weighted avg     0.6757    0.6700    0.6673       100



loading configuration file https://huggingface.co/pysentimiento/robertuito-base-uncased/resolve/main/config.json from cache at /001/usuarios/isaac.bribiesca/.cache/huggingface/transformers/5212cb9b5b32726fce956daa9a21ee0a0c2b6e54c54d1af58c678217d85f8143.4cce50d5a926bf18fe43f2ea8d4596b505e97a64e6e700e993def66b06f1c83b
Model config RobertaConfig {
  "_name_or_path": "pysentimiento/robertuito-base-uncased",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 130,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "

Step,Training Loss
20,0.5914
40,0.4389
60,0.3693
80,0.3411
100,0.3164
120,0.2965
140,0.271
160,0.2804
180,0.2479
200,0.2936




Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Prediction *****
  Num examples = 4000
  Batch size = 200


100%|█████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 1230.17it/s]


Results with split 3:

soft voting:
               precision    recall  f1-score   support

           0     0.8788    0.5800    0.6988        50
           1     0.6866    0.9200    0.7863        50

    accuracy                         0.7500       100
   macro avg     0.7827    0.7500    0.7426       100
weighted avg     0.7827    0.7500    0.7426       100
 

hard voting:
               precision    recall  f1-score   support

           0     0.9091    0.6000    0.7229        50
           1     0.7015    0.9400    0.8034        50

    accuracy                         0.7700       100
   macro avg     0.8053    0.7700    0.7632       100
weighted avg     0.8053    0.7700    0.7632       100



loading configuration file https://huggingface.co/pysentimiento/robertuito-base-uncased/resolve/main/config.json from cache at /001/usuarios/isaac.bribiesca/.cache/huggingface/transformers/5212cb9b5b32726fce956daa9a21ee0a0c2b6e54c54d1af58c678217d85f8143.4cce50d5a926bf18fe43f2ea8d4596b505e97a64e6e700e993def66b06f1c83b
Model config RobertaConfig {
  "_name_or_path": "pysentimiento/robertuito-base-uncased",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 130,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "

Step,Training Loss
20,0.5892
40,0.4979
60,0.4304
80,0.4084
100,0.4097
120,0.3795
140,0.3678
160,0.3558
180,0.3462
200,0.3428




Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Prediction *****
  Num examples = 4000
  Batch size = 200


100%|█████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 1196.40it/s]


Results with split 4:

soft voting:
               precision    recall  f1-score   support

           0     0.8919    0.6600    0.7586        50
           1     0.7302    0.9200    0.8142        50

    accuracy                         0.7900       100
   macro avg     0.8110    0.7900    0.7864       100
weighted avg     0.8110    0.7900    0.7864       100
 

hard voting:
               precision    recall  f1-score   support

           0     0.8947    0.6800    0.7727        50
           1     0.7419    0.9200    0.8214        50

    accuracy                         0.8000       100
   macro avg     0.8183    0.8000    0.7971       100
weighted avg     0.8183    0.8000    0.7971       100



loading configuration file https://huggingface.co/pysentimiento/robertuito-base-uncased/resolve/main/config.json from cache at /001/usuarios/isaac.bribiesca/.cache/huggingface/transformers/5212cb9b5b32726fce956daa9a21ee0a0c2b6e54c54d1af58c678217d85f8143.4cce50d5a926bf18fe43f2ea8d4596b505e97a64e6e700e993def66b06f1c83b
Model config RobertaConfig {
  "_name_or_path": "pysentimiento/robertuito-base-uncased",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 130,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "

Step,Training Loss
20,0.6581
40,0.5568
60,0.4907
80,0.4867
100,0.4423
120,0.4489
140,0.4338
160,0.4166
180,0.4081
200,0.408




Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Prediction *****
  Num examples = 4000
  Batch size = 200


100%|█████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 1242.62it/s]

Results with split 5:

soft voting:
               precision    recall  f1-score   support

           0     0.8043    0.7400    0.7708        50
           1     0.7593    0.8200    0.7885        50

    accuracy                         0.7800       100
   macro avg     0.7818    0.7800    0.7796       100
weighted avg     0.7818    0.7800    0.7796       100
 

hard voting:
               precision    recall  f1-score   support

           0     0.8182    0.7200    0.7660        50
           1     0.7500    0.8400    0.7925        50

    accuracy                         0.7800       100
   macro avg     0.7841    0.7800    0.7792       100
weighted avg     0.7841    0.7800    0.7792       100






In [14]:
# report statistics

print('Soft results: ', f1s_soft)
print('\nHard results: ', f1s_hard)

f1s_soft = np.array(f1s_soft)
f1s_hard = np.array(f1s_hard)

FewShot_Results = {'soft': [f1s_soft.mean(), f1s_soft.std()], 'hard': [f1s_hard.mean(), f1s_hard.std()]}

print('\n\nSoft statistics: ')
print('\t[avg, std]:', FewShot_Results['soft'])

print('\nHard statistics: ')
print('\t[avg, std]:', FewShot_Results['hard'])

Soft results:  [0.767182913250329, 0.6565656565656566, 0.7425599835238389, 0.7863899908452854, 0.7796474358974359]

Hard results:  [0.7575757575757576, 0.667305171892328, 0.7631551848419318, 0.797077922077922, 0.779205138498595]


Soft statistics: 
	[avg, std]: [0.7464691960165093, 0.04737394197468491]

Hard statistics: 
	[avg, std]: [0.7528638349773068, 0.04493806558996559]


## Training with all

In [11]:
from tools.DataLoaders import DatasetPAN

baseTest.get_all_data(_TWEET_BATCH_SIZE_, tokenizer, _MAX_SEQ_LEN_, _PREPROCESS_TEXT_)

Test  = DatasetPAN(baseTest , label_name)
Train = DatasetPAN(baseTrain, label_name)


Reading data...
    Done
Preprocessing text...
    Done
Tokenizing...
    Done
Merging data...
    Done

Total Instances: 2645



In [12]:
from transformers import TrainingArguments

samples = 4 * _NUM_AUTHORS_ * int(100 / _TWEET_BATCH_SIZE_)
_LOGGING_STEPS_ = int(samples / _BATCH_SIZE_)

training_args = TrainingArguments(
    learning_rate               = _LEARNING_RATE_,
    num_train_epochs            = _EPOCHS_,
    per_device_train_batch_size = _BATCH_SIZE_,
    per_device_eval_batch_size  = 200,
    logging_steps               = _LOGGING_STEPS_,
    output_dir                  = _OUTPUT_DIR_,
    save_total_limit            = 5,
    overwrite_output_dir        = True,
    remove_unused_columns       = False,
)

In [13]:
from transformers import AutoModelForSequenceClassification
from tools.DataLoaders import DatasetCrossVal
from transformers import Trainer
from tools.Testing import compute_author_predictions
from sklearn.metrics import f1_score, classification_report
import pickle


# train

task = label_name

# add adapter --------------------------------------------------------

model = AutoModelForSequenceClassification.from_pretrained(_PRETRAINED_LM_, num_labels = len(class_dict))

for name, param in model.named_parameters():
    if 'classifier' in name:
        param.requires_grad = True
    else:
        param.requires_grad = False

# create trainer and train -------------------------------------------

trainer = Trainer(
    model           = model,
    args            = training_args,
    train_dataset   = Train,
)
trainer.args._n_gpu = _NO_GPUS_

trainer.train()


# get predictions ----------------------------------------------------

results            = trainer.predict(Test)
author_predictions = compute_author_predictions(baseTest, results.predictions, task, len(class_dict))


# report metrics -----------------------------------------------------

report = {'soft': classification_report(author_predictions['true'], author_predictions['pred_soft'], digits=4), 
           'hard': classification_report(author_predictions['true'], author_predictions['pred_hard'], digits=4)}

Some weights of the model checkpoint at vinai/bertweet-base were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.decoder.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: 

Step,Training Loss
40,1.179
80,1.0733
120,1.0079
160,0.9139
200,0.9031
240,0.8908
280,0.8392
320,0.7819
360,0.8302
400,0.784


Saving model checkpoint to checkPointsFE/checkpoint-500
Configuration saved in checkPointsFE/checkpoint-500/config.json
Model weights saved in checkPointsFE/checkpoint-500/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Prediction *****
  Num examples = 2645
  Batch size = 200


100%|█████████████████████████████████████████████████████████████████████| 142/142 [00:00<00:00, 1986.07it/s]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [14]:

print("Results:\n")
print("soft voting:\n", report['soft'], '\n')
print("hard voting:\n", report['hard'])

Results:

soft voting:
               precision    recall  f1-score   support

           0     0.8793    0.9107    0.8947        56
           1     0.6543    0.9138    0.7626        58
           2     1.0000    0.1000    0.1818        20
           3     1.0000    0.1250    0.2222         8

    accuracy                         0.7535       142
   macro avg     0.8834    0.5124    0.5153       142
weighted avg     0.8112    0.7535    0.7025       142
 

hard voting:
               precision    recall  f1-score   support

           0     0.8772    0.8929    0.8850        56
           1     0.6386    0.9138    0.7518        58
           2     1.0000    0.1000    0.1818        20
           3     0.0000    0.0000    0.0000         8

    accuracy                         0.7394       142
   macro avg     0.6289    0.4767    0.4546       142
weighted avg     0.7476    0.7394    0.6817       142

