## Set Global Seed 

In [1]:
import os
import random
import numpy as np
import torch
import transformers

def set_all_seeds(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed = 260615
set_all_seeds(seed)

print("The global seed " + str(seed))

The global seed 260615


## Hyperparameters

In [2]:
# LANGUAGE

_LANGUAGE_         = 'en'
_DATASET_          = '2017'

In [3]:
# MODEL CLASSIFICATION

_PRETRAINED_LM_    = 'ynie/bart-large-snli_mnli_fever_anli_R1_R2_R3-nli'
_PREPROCESS_TEXT_  = True
_TWEET_BATCH_SIZE_ = 5
_ADAPTER_CONFIG_   = transformers.ParallelConfig(reduction_factor = 256)
_MAX_SEQ_LEN_      = 150

In [4]:
# TRAIN

_OUTPUT_DIR_       = 'checkPointsNLI'
_LOGGING_STEPS_    = 50
_NUM_AUTHORS_      = 256
_K_FOLD_CV_        = 5
_NO_GPUS_          = 1
_BATCH_SIZE_       = int(8 / _NO_GPUS_)
_EPOCHS_           = 10
_LEARNING_RATE_    = 1e-8

# PREDICTIONS

_PRED_DIR_         = 'NLITestGeneralized'

## Other parameters

In [5]:
# LABEL DICTONARIES -----------------------------------------------------------------------

# 2017

gender_dict    = {'female': 0, 'male':   1}
varietyEN_dict = {'australia': 0, 'canada': 1, 'great britain': 2, 'ireland': 3, 'new zealand': 4, 'united states': 5}
varietyES_dict = {'argentina': 0, 'chile': 1, 'colombia': 2, 'mexico': 3, 'peru': 4, 'spain': 5, 'venezuela': 6}  

genderEN_hyp  = {0: 'I’m a female', 1: 'I’m a male'}
genderES_hyp  = {0: 'Mi nombre es María', 1: 'Mi nombre es José'}

# 2019

bots_dict  = {'human': 0, 'bot': 1}
botsEN_hyp = {0: 'This is a text from a person', 1: 'This is a text from a machine'}
botsES_hyp = {0: 'Humano', 1: 'Bot'}

# 2020 

fakeNews_dict  = {'0': 0, '1': 1}
fakeNewsEN_hyp = {0: 'This author is a normal user', 1: 'This author spreads fake news'}
fakeNewsES_hyp = {0: 'Este autor es un usuario normal', 1: 'Este autor publica noticias falsas'}

# 2021

hateSpeech_dict  = {'0': 0, '1': 1}
hateSpeechEN_hyp = {0: 'This text does not contain hate speech', 1: 'This text expresses prejudice and hate speech'}
hateSpeechES_hyp = {0: 'Este texto es moderado, respetuoso, cortés y civilizado', 1: 'Este texto expresa odio o prejuicios'}

In [6]:
# SET LANGUAGE DICTIONARIES --------------------------------------------------

if _LANGUAGE_ == 'en':
    gender_hyp     = genderEN_hyp
    variety_dict   = varietyEN_dict
    fakeNews_hyp   = fakeNewsEN_hyp
    hateSpeech_hyp = hateSpeechEN_hyp
    bots_hyp       = botsEN_hyp 

elif _LANGUAGE_ == 'es':
    gender_hyp     = genderES_hyp
    variety_dict   = varietyES_dict
    fakeNews_hyp   = fakeNewsES_hyp
    hateSpeech_hyp = hateSpeechES_hyp
    bots_hyp       = botsES_hyp
    
    
# SET LANGUAGE AND DATASET PARAMETERS ----------------------------------------
    
if   _DATASET_ == '2017':
    label_idx  = 1
    class_dict = gender_dict
    label_name = 'gender'
    label_hyp  = gender_hyp
    
elif _DATASET_ == '2019':
    label_idx  = 1
    class_dict = bots_dict
    label_name = 'bots'
    label_hyp  = bots_hyp
    
elif _DATASET_ == '2020':
    label_idx  = 1
    class_dict = fakeNews_dict
    label_name = 'fakeNews'
    label_hyp  = fakeNews_hyp
    
elif _DATASET_ == '2021':
    label_idx  = 1
    class_dict = hateSpeech_dict
    label_name = 'hateSpeech'
    label_hyp  = hateSpeech_hyp
    

In [7]:
from transformers import AutoTokenizer, PretrainedConfig

tokenizer = AutoTokenizer.from_pretrained(_PRETRAINED_LM_)
vocab = tokenizer.get_vocab()

config             = PretrainedConfig.from_pretrained(_PRETRAINED_LM_)
nli_label2id       = config.label2id
is_encoder_decoder = config.is_encoder_decoder

You are using a model of type bart to instantiate a model of type . This is not supported for all configurations of models and can yield errors.


## Datasets

In [8]:
# GET AUTHORS AND LABELS -----------------------------------------------------

from tools.DataLoaders import BasePAN

baseTrain  = BasePAN(Dir        = 'data/' + _DATASET_,
                     split      = 'train',
                     language   = _LANGUAGE_,
                     label_idx  = label_idx,
                     class_dict = class_dict,
                     label_name = label_name)

baseTest   = BasePAN(Dir        = 'data/' + _DATASET_,
                     split      = 'test',
                     language   = _LANGUAGE_,
                     label_idx  = label_idx,
                     class_dict = class_dict,
                     label_name = label_name)

In [9]:
# GET K-FOLD SPLITS -----------------------------------------------------

crossVal_splits = baseTrain.cross_val(_K_FOLD_CV_, _NUM_AUTHORS_)

crossVal_splits[0][0]

['5fb799c39e1c8a92f6c580ff516bdc',
 'c3fc612f9fe498210613a7991a1c7f2a',
 '7a2b6e351032615aa3c19d8c252bd552',
 '5849edbc23ee81353911f951e0903ea',
 '42dc691f860354271b77171b152967cf',
 '7ffdaa0d7d70f8c485e3f645aa9ba7d9',
 '49a6b94266d28dd3e241905d2300e504',
 'ad6faf0d1624a7015f0eab10a6516fc2',
 'c93fd1e3c9e5ba437a544db7b84e375e',
 '5fc0d0a6f90891a3cb4b32d7169e6676',
 '60705218ce29389bb7181837d701556e',
 '56b4ff0baba162a5c53323fe3dcd0d64',
 'a9430efa05e5abc56310ff9a5173cd07',
 '87318fe9b132b934dd298c3f7f3fbcf6',
 'd052413e23540da3e613f466e53a1317',
 '1342f1fee84567cda4741b41a770b4e0',
 '1de5e53b813163d9c5105cd15ccaa842',
 '4dbd49cb6fe542993bfd0b968be669bd',
 'e9cd1a184a4c34f480748df1bd3d2bf6',
 '505da57fd58bb00c6a164789c9658c58',
 'b123667f9d6697eb2828dc00920099fc',
 'b89bdf194952b1e65de3ba5d8cf35305',
 '47a9df75b78c187edebc2a740b118dc8',
 'ad1b99bd41ef75f70cd2e8406e254015',
 '31e8a4b69e210ed705c0901d41818ad',
 'f860e1654982c95af3c7da43c3d0ef94',
 '7fd945221f3a712a1223810e2b1ddc10',
 'b2e

In [10]:
# GET TWEETS -----------------------------------------------------

baseTrain.get_all_data(_TWEET_BATCH_SIZE_, tokenizer, _MAX_SEQ_LEN_, _PREPROCESS_TEXT_, NLI=True, label_hyp=label_hyp, nli_label2id=nli_label2id)


Reading data...
    Done
Preprocessing text...
    Done
Tokenizing...
    Done
Merging data...
    Done

Total Instances: 144000



## Training

In [11]:
from transformers import TrainingArguments

samples = 4 * _NUM_AUTHORS_ * int(100 / _TWEET_BATCH_SIZE_)
_LOGGING_STEPS_ = int(samples / _BATCH_SIZE_)

training_args = TrainingArguments(
    learning_rate               = _LEARNING_RATE_,
    num_train_epochs            = _EPOCHS_,
    per_device_train_batch_size = _BATCH_SIZE_,
    per_device_eval_batch_size  = 200,
    logging_steps               = _LOGGING_STEPS_,
    output_dir                  = _OUTPUT_DIR_,
    save_total_limit            = 10,
    overwrite_output_dir        = True,
    remove_unused_columns       = False,
)

In [12]:
from transformers import AutoModelForSequenceClassification
from tools.DataLoaders import DatasetCrossValnli
from transformers import Trainer
from tools.Testing import compute_author_predictions_nli, compute_author_predictions_nli_LR
from sklearn.metrics import f1_score, classification_report
import pickle


# train

task = label_name

f1s_soft = []
f1s_hard = []

f1s_soft_LR = []
f1s_hard_LR = []

for split in range( _K_FOLD_CV_ ):
    
    # loaders for current split ------------------------------------------
    
    authors_train, authors_val = crossVal_splits[split]
    
    Train = DatasetCrossValnli(baseTrain, authors_train)
    Val   = DatasetCrossValnli(baseTrain, authors_val)
    
    
    # initialize model ---------------------------------------------------
    
    model = AutoModelForSequenceClassification.from_pretrained(_PRETRAINED_LM_)
    
    
    # create trainer and train -------------------------------------------
        
    trainer = Trainer(
        model           = model,
        args            = training_args,
        train_dataset   = Train,
    )
    trainer.args._n_gpu = _NO_GPUS_

    trainer.train()
    
    
    # get predictions ----------------------------------------------------
    
    
    ignore_keys = None
    if is_encoder_decoder:
        ignore_keys = ['encoder_last_hidden_state']

    results            = trainer.predict(Val , ignore_keys = ignore_keys)
    author_predictions = compute_author_predictions_nli(Val, results.predictions, task, len(class_dict), nli_label2id)

    # report metrics 

    report = {'soft': classification_report(author_predictions['true'], author_predictions['pred_soft'], digits=4), 
               'hard': classification_report(author_predictions['true'], author_predictions['pred_hard'], digits=4)}

    f1s_soft.append( f1_score(author_predictions['true'], author_predictions['pred_soft'], average = 'macro') )
    f1s_hard.append( f1_score(author_predictions['true'], author_predictions['pred_hard'], average = 'macro') )
    
    print("Results with split " + str(split + 1) + ":\n")
    print("soft voting:\n", report['soft'], '\n')
    print("hard voting:\n", report['hard'])


    # get predictions with Logistic Regression----------------------------

    resultsTrain = trainer.predict(Train, ignore_keys = ignore_keys)
    author_predictions_LR = compute_author_predictions_nli_LR(Train, Val, resultsTrain.predictions, results.predictions, task, len(class_dict))
    
    f1s_soft_LR.append( f1_score(author_predictions_LR['true'], author_predictions_LR['pred_soft'], average = 'macro') )
    f1s_hard_LR.append( f1_score(author_predictions_LR['true'], author_predictions_LR['pred_hard'], average = 'macro') )
    
    # report metrics 

    report_LR = {'soft': classification_report(author_predictions_LR['true'], author_predictions_LR['pred_soft'], digits=4), 
               'hard': classification_report(author_predictions_LR['true'], author_predictions_LR['pred_hard'], digits=4)}

    print("Results with split " + str(split + 1) + " using LOGISTIC REGRESSION:\n")
    print("soft voting:\n", report_LR['soft'], '\n')
    print("hard voting:\n", report_LR['hard'])

     
    
    # save predictions ----------------------------------------------------
    
    DIR = 'results/' + _DATASET_ + '/' + _LANGUAGE_ + '/' + _PRED_DIR_ + '/' + str(_NUM_AUTHORS_) + '_authors/split_' + str(split + 1) + '/'
    if not os.path.exists(DIR):
        os.makedirs(DIR)

    with open(DIR + 'predictions.pickle', 'wb') as f:
        pickle.dump(author_predictions, f)
    
    with open(DIR + 'predictions_LR.pickle', 'wb') as f:
        pickle.dump(author_predictions_LR, f)

    with open(DIR + 'report.txt', 'w') as f:
        f.write("soft voting:\n" + report['soft'] + '\n\n')
        f.write("hard voting:\n" + report['hard'])
        
    with open(DIR + 'report_LR.txt', 'w') as f:
        f.write("soft voting:\n" + report_LR['soft'] + '\n\n')
        f.write("hard voting:\n" + report_LR['hard'])
    


***** Running training *****
  Num examples = 20480
  Num Epochs = 10
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 25600


Step,Training Loss
2560,3.6536
5120,1.0004
7680,0.8321
10240,0.768
12800,0.7415
15360,0.723
17920,0.7175
20480,0.7116
23040,0.7049
25600,0.7105


Saving model checkpoint to checkPointsNLI/checkpoint-500
Configuration saved in checkPointsNLI/checkpoint-500/config.json
Model weights saved in checkPointsNLI/checkpoint-500/pytorch_model.bin
Saving model checkpoint to checkPointsNLI/checkpoint-1000
Configuration saved in checkPointsNLI/checkpoint-1000/config.json
Model weights saved in checkPointsNLI/checkpoint-1000/pytorch_model.bin
Saving model checkpoint to checkPointsNLI/checkpoint-1500
Configuration saved in checkPointsNLI/checkpoint-1500/config.json
Model weights saved in checkPointsNLI/checkpoint-1500/pytorch_model.bin
Saving model checkpoint to checkPointsNLI/checkpoint-2000
Configuration saved in checkPointsNLI/checkpoint-2000/config.json
Model weights saved in checkPointsNLI/checkpoint-2000/pytorch_model.bin
Saving model checkpoint to checkPointsNLI/checkpoint-2500
Configuration saved in checkPointsNLI/checkpoint-2500/config.json
Model weights saved in checkPointsNLI/checkpoint-2500/pytorch_model.bin
Saving model checkpoint

Saving model checkpoint to checkPointsNLI/checkpoint-16500
Configuration saved in checkPointsNLI/checkpoint-16500/config.json
Model weights saved in checkPointsNLI/checkpoint-16500/pytorch_model.bin
Deleting older checkpoint [checkPointsNLI/checkpoint-11500] due to args.save_total_limit
Saving model checkpoint to checkPointsNLI/checkpoint-17000
Configuration saved in checkPointsNLI/checkpoint-17000/config.json
Model weights saved in checkPointsNLI/checkpoint-17000/pytorch_model.bin
Deleting older checkpoint [checkPointsNLI/checkpoint-12000] due to args.save_total_limit
Saving model checkpoint to checkPointsNLI/checkpoint-17500
Configuration saved in checkPointsNLI/checkpoint-17500/config.json
Model weights saved in checkPointsNLI/checkpoint-17500/pytorch_model.bin
Deleting older checkpoint [checkPointsNLI/checkpoint-12500] due to args.save_total_limit
Saving model checkpoint to checkPointsNLI/checkpoint-18000
Configuration saved in checkPointsNLI/checkpoint-18000/config.json
Model weig

acc: 0.8125: 100%|█████████████████████████████████████████████████████████| 720/720 [00:05<00:00, 138.46it/s]
***** Running Prediction *****
  Num examples = 20480
  Batch size = 200


Results with split 1:

soft voting:
               precision    recall  f1-score   support

           0     0.7877    0.8556    0.8202       360
           1     0.8419    0.7694    0.8041       360

    accuracy                         0.8125       720
   macro avg     0.8148    0.8125    0.8122       720
weighted avg     0.8148    0.8125    0.8122       720
 

hard voting:
               precision    recall  f1-score   support

           0     0.7708    0.8222    0.7957       360
           1     0.8095    0.7556    0.7816       360

    accuracy                         0.7889       720
   macro avg     0.7902    0.7889    0.7887       720
weighted avg     0.7902    0.7889    0.7887       720



acc: 0.8166666666666667: 100%|█████████████████████████████████████████████| 720/720 [00:05<00:00, 126.64it/s]


Results with split 1 using LOGISTIC REGRESSION:

soft voting:
               precision    recall  f1-score   support

           0     0.8220    0.8083    0.8151       360
           1     0.8115    0.8250    0.8182       360

    accuracy                         0.8167       720
   macro avg     0.8168    0.8167    0.8167       720
weighted avg     0.8168    0.8167    0.8167       720
 

hard voting:
               precision    recall  f1-score   support

           0     0.8075    0.7806    0.7938       360
           1     0.7876    0.8139    0.8005       360

    accuracy                         0.7972       720
   macro avg     0.7976    0.7972    0.7972       720
weighted avg     0.7976    0.7972    0.7972       720



loading configuration file https://huggingface.co/ynie/bart-large-snli_mnli_fever_anli_R1_R2_R3-nli/resolve/main/config.json from cache at /001/usuarios/isaac.bribiesca/.cache/huggingface/transformers/bf704e14bcd921d2d4cfcad78a3add263a85a5d067122102d3add0fb620085c7.88e321f78373dda73f5c421340751fd102e1cf513f3e985ac0ca9a0865c4e94a
Model config BartConfig {
  "_name_or_path": "ynie/bart-large-snli_mnli_fever_anli_R1_R2_R3-nli",
  "_num_labels": 3,
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "BartForSequenceClassification"
  ],
  "attention_dropout": 0.0,
  "bos_token_id": 0,
  "classif_dropout": 0.0,
  "classifier_dropout": 0.0,
  "d_model": 1024,
  "decoder_attention_heads": 16,
  "decoder_ffn_dim": 4096,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 2,
  "dropout": 0.1,
  "encoder_attention_heads": 16,
  "encoder_ffn_dim": 4096,
  "encoder_layerdrop":

Step,Training Loss
2560,3.7611
5120,1.0263
7680,0.8453
10240,0.7855
12800,0.7563
15360,0.7357
17920,0.7269
20480,0.7218
23040,0.7178
25600,0.7164


Saving model checkpoint to checkPointsNLI/checkpoint-500
Configuration saved in checkPointsNLI/checkpoint-500/config.json
Model weights saved in checkPointsNLI/checkpoint-500/pytorch_model.bin
Deleting older checkpoint [checkPointsNLI/checkpoint-21000] due to args.save_total_limit
Saving model checkpoint to checkPointsNLI/checkpoint-1000
Configuration saved in checkPointsNLI/checkpoint-1000/config.json
Model weights saved in checkPointsNLI/checkpoint-1000/pytorch_model.bin
Deleting older checkpoint [checkPointsNLI/checkpoint-21500] due to args.save_total_limit
Saving model checkpoint to checkPointsNLI/checkpoint-1500
Configuration saved in checkPointsNLI/checkpoint-1500/config.json
Model weights saved in checkPointsNLI/checkpoint-1500/pytorch_model.bin
Deleting older checkpoint [checkPointsNLI/checkpoint-22000] due to args.save_total_limit
Saving model checkpoint to checkPointsNLI/checkpoint-2000
Configuration saved in checkPointsNLI/checkpoint-2000/config.json
Model weights saved in c

Saving model checkpoint to checkPointsNLI/checkpoint-15000
Configuration saved in checkPointsNLI/checkpoint-15000/config.json
Model weights saved in checkPointsNLI/checkpoint-15000/pytorch_model.bin
Deleting older checkpoint [checkPointsNLI/checkpoint-10000] due to args.save_total_limit
Saving model checkpoint to checkPointsNLI/checkpoint-15500
Configuration saved in checkPointsNLI/checkpoint-15500/config.json
Model weights saved in checkPointsNLI/checkpoint-15500/pytorch_model.bin
Deleting older checkpoint [checkPointsNLI/checkpoint-10500] due to args.save_total_limit
Saving model checkpoint to checkPointsNLI/checkpoint-16000
Configuration saved in checkPointsNLI/checkpoint-16000/config.json
Model weights saved in checkPointsNLI/checkpoint-16000/pytorch_model.bin
Deleting older checkpoint [checkPointsNLI/checkpoint-11000] due to args.save_total_limit
Saving model checkpoint to checkPointsNLI/checkpoint-16500
Configuration saved in checkPointsNLI/checkpoint-16500/config.json
Model weig

acc: 0.7666666666666667: 100%|█████████████████████████████████████████████| 720/720 [00:05<00:00, 128.62it/s]
***** Running Prediction *****
  Num examples = 20480
  Batch size = 200


Results with split 2:

soft voting:
               precision    recall  f1-score   support

           0     0.7567    0.7861    0.7711       360
           1     0.7775    0.7472    0.7620       360

    accuracy                         0.7667       720
   macro avg     0.7671    0.7667    0.7666       720
weighted avg     0.7671    0.7667    0.7666       720
 

hard voting:
               precision    recall  f1-score   support

           0     0.7778    0.7778    0.7778       360
           1     0.7778    0.7778    0.7778       360

    accuracy                         0.7778       720
   macro avg     0.7778    0.7778    0.7778       720
weighted avg     0.7778    0.7778    0.7778       720



acc: 0.7638888888888888: 100%|█████████████████████████████████████████████| 720/720 [00:05<00:00, 126.91it/s]


Results with split 2 using LOGISTIC REGRESSION:

soft voting:
               precision    recall  f1-score   support

           0     0.7950    0.7111    0.7507       360
           1     0.7387    0.8167    0.7757       360

    accuracy                         0.7639       720
   macro avg     0.7669    0.7639    0.7632       720
weighted avg     0.7669    0.7639    0.7632       720
 

hard voting:
               precision    recall  f1-score   support

           0     0.8000    0.7222    0.7591       360
           1     0.7468    0.8194    0.7815       360

    accuracy                         0.7708       720
   macro avg     0.7734    0.7708    0.7703       720
weighted avg     0.7734    0.7708    0.7703       720



loading configuration file https://huggingface.co/ynie/bart-large-snli_mnli_fever_anli_R1_R2_R3-nli/resolve/main/config.json from cache at /001/usuarios/isaac.bribiesca/.cache/huggingface/transformers/bf704e14bcd921d2d4cfcad78a3add263a85a5d067122102d3add0fb620085c7.88e321f78373dda73f5c421340751fd102e1cf513f3e985ac0ca9a0865c4e94a
Model config BartConfig {
  "_name_or_path": "ynie/bart-large-snli_mnli_fever_anli_R1_R2_R3-nli",
  "_num_labels": 3,
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "BartForSequenceClassification"
  ],
  "attention_dropout": 0.0,
  "bos_token_id": 0,
  "classif_dropout": 0.0,
  "classifier_dropout": 0.0,
  "d_model": 1024,
  "decoder_attention_heads": 16,
  "decoder_ffn_dim": 4096,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 2,
  "dropout": 0.1,
  "encoder_attention_heads": 16,
  "encoder_ffn_dim": 4096,
  "encoder_layerdrop":

Step,Training Loss
2560,3.6844
5120,0.9911
7680,0.8294
10240,0.7716
12800,0.7409
15360,0.7251
17920,0.7197
20480,0.7076
23040,0.7084
25600,0.7062


Saving model checkpoint to checkPointsNLI/checkpoint-500
Configuration saved in checkPointsNLI/checkpoint-500/config.json
Model weights saved in checkPointsNLI/checkpoint-500/pytorch_model.bin
Deleting older checkpoint [checkPointsNLI/checkpoint-21000] due to args.save_total_limit
Saving model checkpoint to checkPointsNLI/checkpoint-1000
Configuration saved in checkPointsNLI/checkpoint-1000/config.json
Model weights saved in checkPointsNLI/checkpoint-1000/pytorch_model.bin
Deleting older checkpoint [checkPointsNLI/checkpoint-21500] due to args.save_total_limit
Saving model checkpoint to checkPointsNLI/checkpoint-1500
Configuration saved in checkPointsNLI/checkpoint-1500/config.json
Model weights saved in checkPointsNLI/checkpoint-1500/pytorch_model.bin
Deleting older checkpoint [checkPointsNLI/checkpoint-22000] due to args.save_total_limit
Saving model checkpoint to checkPointsNLI/checkpoint-2000
Configuration saved in checkPointsNLI/checkpoint-2000/config.json
Model weights saved in c

Saving model checkpoint to checkPointsNLI/checkpoint-15000
Configuration saved in checkPointsNLI/checkpoint-15000/config.json
Model weights saved in checkPointsNLI/checkpoint-15000/pytorch_model.bin
Deleting older checkpoint [checkPointsNLI/checkpoint-10000] due to args.save_total_limit
Saving model checkpoint to checkPointsNLI/checkpoint-15500
Configuration saved in checkPointsNLI/checkpoint-15500/config.json
Model weights saved in checkPointsNLI/checkpoint-15500/pytorch_model.bin
Deleting older checkpoint [checkPointsNLI/checkpoint-10500] due to args.save_total_limit
Saving model checkpoint to checkPointsNLI/checkpoint-16000
Configuration saved in checkPointsNLI/checkpoint-16000/config.json
Model weights saved in checkPointsNLI/checkpoint-16000/pytorch_model.bin
Deleting older checkpoint [checkPointsNLI/checkpoint-11000] due to args.save_total_limit
Saving model checkpoint to checkPointsNLI/checkpoint-16500
Configuration saved in checkPointsNLI/checkpoint-16500/config.json
Model weig

acc: 0.7958333333333333: 100%|█████████████████████████████████████████████| 720/720 [00:04<00:00, 162.45it/s]
***** Running Prediction *****
  Num examples = 20480
  Batch size = 200


Results with split 3:

soft voting:
               precision    recall  f1-score   support

           0     0.7840    0.8167    0.8000       360
           1     0.8087    0.7750    0.7915       360

    accuracy                         0.7958       720
   macro avg     0.7963    0.7958    0.7957       720
weighted avg     0.7963    0.7958    0.7957       720
 

hard voting:
               precision    recall  f1-score   support

           0     0.7627    0.7944    0.7782       360
           1     0.7855    0.7528    0.7688       360

    accuracy                         0.7736       720
   macro avg     0.7741    0.7736    0.7735       720
weighted avg     0.7741    0.7736    0.7735       720



acc: 0.7972222222222223: 100%|█████████████████████████████████████████████| 720/720 [00:03<00:00, 180.32it/s]


Results with split 3 using LOGISTIC REGRESSION:

soft voting:
               precision    recall  f1-score   support

           0     0.8204    0.7611    0.7896       360
           1     0.7772    0.8333    0.8043       360

    accuracy                         0.7972       720
   macro avg     0.7988    0.7972    0.7970       720
weighted avg     0.7988    0.7972    0.7970       720
 

hard voting:
               precision    recall  f1-score   support

           0     0.8127    0.7472    0.7786       360
           1     0.7661    0.8278    0.7957       360

    accuracy                         0.7875       720
   macro avg     0.7894    0.7875    0.7872       720
weighted avg     0.7894    0.7875    0.7872       720



loading configuration file https://huggingface.co/ynie/bart-large-snli_mnli_fever_anli_R1_R2_R3-nli/resolve/main/config.json from cache at /001/usuarios/isaac.bribiesca/.cache/huggingface/transformers/bf704e14bcd921d2d4cfcad78a3add263a85a5d067122102d3add0fb620085c7.88e321f78373dda73f5c421340751fd102e1cf513f3e985ac0ca9a0865c4e94a
Model config BartConfig {
  "_name_or_path": "ynie/bart-large-snli_mnli_fever_anli_R1_R2_R3-nli",
  "_num_labels": 3,
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "BartForSequenceClassification"
  ],
  "attention_dropout": 0.0,
  "bos_token_id": 0,
  "classif_dropout": 0.0,
  "classifier_dropout": 0.0,
  "d_model": 1024,
  "decoder_attention_heads": 16,
  "decoder_ffn_dim": 4096,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 2,
  "dropout": 0.1,
  "encoder_attention_heads": 16,
  "encoder_ffn_dim": 4096,
  "encoder_layerdrop":

Step,Training Loss
2560,3.6924
5120,1.0089
7680,0.8367
10240,0.7783
12800,0.7486
15360,0.7307
17920,0.7173
20480,0.712
23040,0.7122
25600,0.7085


Saving model checkpoint to checkPointsNLI/checkpoint-500
Configuration saved in checkPointsNLI/checkpoint-500/config.json
Model weights saved in checkPointsNLI/checkpoint-500/pytorch_model.bin
Deleting older checkpoint [checkPointsNLI/checkpoint-21000] due to args.save_total_limit
Saving model checkpoint to checkPointsNLI/checkpoint-1000
Configuration saved in checkPointsNLI/checkpoint-1000/config.json
Model weights saved in checkPointsNLI/checkpoint-1000/pytorch_model.bin
Deleting older checkpoint [checkPointsNLI/checkpoint-21500] due to args.save_total_limit
Saving model checkpoint to checkPointsNLI/checkpoint-1500
Configuration saved in checkPointsNLI/checkpoint-1500/config.json
Model weights saved in checkPointsNLI/checkpoint-1500/pytorch_model.bin
Deleting older checkpoint [checkPointsNLI/checkpoint-22000] due to args.save_total_limit
Saving model checkpoint to checkPointsNLI/checkpoint-2000
Configuration saved in checkPointsNLI/checkpoint-2000/config.json
Model weights saved in c

Saving model checkpoint to checkPointsNLI/checkpoint-15000
Configuration saved in checkPointsNLI/checkpoint-15000/config.json
Model weights saved in checkPointsNLI/checkpoint-15000/pytorch_model.bin
Deleting older checkpoint [checkPointsNLI/checkpoint-10000] due to args.save_total_limit
Saving model checkpoint to checkPointsNLI/checkpoint-15500
Configuration saved in checkPointsNLI/checkpoint-15500/config.json
Model weights saved in checkPointsNLI/checkpoint-15500/pytorch_model.bin
Deleting older checkpoint [checkPointsNLI/checkpoint-10500] due to args.save_total_limit
Saving model checkpoint to checkPointsNLI/checkpoint-16000
Configuration saved in checkPointsNLI/checkpoint-16000/config.json
Model weights saved in checkPointsNLI/checkpoint-16000/pytorch_model.bin
Deleting older checkpoint [checkPointsNLI/checkpoint-11000] due to args.save_total_limit
Saving model checkpoint to checkPointsNLI/checkpoint-16500
Configuration saved in checkPointsNLI/checkpoint-16500/config.json
Model weig

acc: 0.7888888888888889: 100%|█████████████████████████████████████████████| 720/720 [00:04<00:00, 169.30it/s]
***** Running Prediction *****
  Num examples = 20480
  Batch size = 200


Results with split 4:

soft voting:
               precision    recall  f1-score   support

           0     0.7640    0.8361    0.7984       360
           1     0.8190    0.7417    0.7784       360

    accuracy                         0.7889       720
   macro avg     0.7915    0.7889    0.7884       720
weighted avg     0.7915    0.7889    0.7884       720
 

hard voting:
               precision    recall  f1-score   support

           0     0.7368    0.7778    0.7568       360
           1     0.7647    0.7222    0.7429       360

    accuracy                         0.7500       720
   macro avg     0.7508    0.7500    0.7498       720
weighted avg     0.7508    0.7500    0.7498       720



acc: 0.7888888888888889: 100%|█████████████████████████████████████████████| 720/720 [00:03<00:00, 180.00it/s]


Results with split 4 using LOGISTIC REGRESSION:

soft voting:
               precision    recall  f1-score   support

           0     0.7873    0.7917    0.7895       360
           1     0.7905    0.7861    0.7883       360

    accuracy                         0.7889       720
   macro avg     0.7889    0.7889    0.7889       720
weighted avg     0.7889    0.7889    0.7889       720
 

hard voting:
               precision    recall  f1-score   support

           0     0.7596    0.7722    0.7658       360
           1     0.7684    0.7556    0.7619       360

    accuracy                         0.7639       720
   macro avg     0.7640    0.7639    0.7639       720
weighted avg     0.7640    0.7639    0.7639       720



loading configuration file https://huggingface.co/ynie/bart-large-snli_mnli_fever_anli_R1_R2_R3-nli/resolve/main/config.json from cache at /001/usuarios/isaac.bribiesca/.cache/huggingface/transformers/bf704e14bcd921d2d4cfcad78a3add263a85a5d067122102d3add0fb620085c7.88e321f78373dda73f5c421340751fd102e1cf513f3e985ac0ca9a0865c4e94a
Model config BartConfig {
  "_name_or_path": "ynie/bart-large-snli_mnli_fever_anli_R1_R2_R3-nli",
  "_num_labels": 3,
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "BartForSequenceClassification"
  ],
  "attention_dropout": 0.0,
  "bos_token_id": 0,
  "classif_dropout": 0.0,
  "classifier_dropout": 0.0,
  "d_model": 1024,
  "decoder_attention_heads": 16,
  "decoder_ffn_dim": 4096,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 2,
  "dropout": 0.1,
  "encoder_attention_heads": 16,
  "encoder_ffn_dim": 4096,
  "encoder_layerdrop":

Step,Training Loss
2560,3.6398
5120,1.0032
7680,0.8298
10240,0.768
12800,0.7369
15360,0.7237
17920,0.7153
20480,0.7044
23040,0.7072
25600,0.7038


Saving model checkpoint to checkPointsNLI/checkpoint-500
Configuration saved in checkPointsNLI/checkpoint-500/config.json
Model weights saved in checkPointsNLI/checkpoint-500/pytorch_model.bin
Deleting older checkpoint [checkPointsNLI/checkpoint-21000] due to args.save_total_limit
Saving model checkpoint to checkPointsNLI/checkpoint-1000
Configuration saved in checkPointsNLI/checkpoint-1000/config.json
Model weights saved in checkPointsNLI/checkpoint-1000/pytorch_model.bin
Deleting older checkpoint [checkPointsNLI/checkpoint-21500] due to args.save_total_limit
Saving model checkpoint to checkPointsNLI/checkpoint-1500
Configuration saved in checkPointsNLI/checkpoint-1500/config.json
Model weights saved in checkPointsNLI/checkpoint-1500/pytorch_model.bin
Deleting older checkpoint [checkPointsNLI/checkpoint-22000] due to args.save_total_limit
Saving model checkpoint to checkPointsNLI/checkpoint-2000
Configuration saved in checkPointsNLI/checkpoint-2000/config.json
Model weights saved in c

Saving model checkpoint to checkPointsNLI/checkpoint-15000
Configuration saved in checkPointsNLI/checkpoint-15000/config.json
Model weights saved in checkPointsNLI/checkpoint-15000/pytorch_model.bin
Deleting older checkpoint [checkPointsNLI/checkpoint-10000] due to args.save_total_limit
Saving model checkpoint to checkPointsNLI/checkpoint-15500
Configuration saved in checkPointsNLI/checkpoint-15500/config.json
Model weights saved in checkPointsNLI/checkpoint-15500/pytorch_model.bin
Deleting older checkpoint [checkPointsNLI/checkpoint-10500] due to args.save_total_limit
Saving model checkpoint to checkPointsNLI/checkpoint-16000
Configuration saved in checkPointsNLI/checkpoint-16000/config.json
Model weights saved in checkPointsNLI/checkpoint-16000/pytorch_model.bin
Deleting older checkpoint [checkPointsNLI/checkpoint-11000] due to args.save_total_limit
Saving model checkpoint to checkPointsNLI/checkpoint-16500
Configuration saved in checkPointsNLI/checkpoint-16500/config.json
Model weig

acc: 0.7875: 100%|█████████████████████████████████████████████████████████| 720/720 [00:04<00:00, 174.33it/s]
***** Running Prediction *****
  Num examples = 20480
  Batch size = 200


Results with split 5:

soft voting:
               precision    recall  f1-score   support

           0     0.7820    0.7972    0.7895       360
           1     0.7932    0.7778    0.7854       360

    accuracy                         0.7875       720
   macro avg     0.7876    0.7875    0.7875       720
weighted avg     0.7876    0.7875    0.7875       720
 

hard voting:
               precision    recall  f1-score   support

           0     0.7971    0.7639    0.7801       360
           1     0.7733    0.8056    0.7891       360

    accuracy                         0.7847       720
   macro avg     0.7852    0.7847    0.7846       720
weighted avg     0.7852    0.7847    0.7846       720



acc: 0.7805555555555556: 100%|█████████████████████████████████████████████| 720/720 [00:04<00:00, 175.82it/s]


Results with split 5 using LOGISTIC REGRESSION:

soft voting:
               precision    recall  f1-score   support

           0     0.8061    0.7389    0.7710       360
           1     0.7590    0.8222    0.7893       360

    accuracy                         0.7806       720
   macro avg     0.7825    0.7806    0.7802       720
weighted avg     0.7825    0.7806    0.7802       720
 

hard voting:
               precision    recall  f1-score   support

           0     0.8131    0.7250    0.7665       360
           1     0.7519    0.8333    0.7905       360

    accuracy                         0.7792       720
   macro avg     0.7825    0.7792    0.7785       720
weighted avg     0.7825    0.7792    0.7785       720



In [13]:
# report statistics

print('Soft results: ', f1s_soft)
print('\nHard results: ', f1s_hard)

f1s_soft = np.array(f1s_soft)
f1s_hard = np.array(f1s_hard)

FewShot_Results = {'soft': [f1s_soft.mean(), f1s_soft.std()], 'hard': [f1s_hard.mean(), f1s_hard.std()]}

print('\n\nSoft statistics: ')
print('\t[avg, std]:', FewShot_Results['soft'])

print('\nHard statistics: ')
print('\t[avg, std]:', FewShot_Results['hard'])

Soft results:  [0.8121517705468665, 0.7665784131345956, 0.7957446808510638, 0.7884170720201684, 0.7874799122602252]

Hard results:  [0.7886540600667409, 0.7777777777777778, 0.7735128093790709, 0.7498069498069497, 0.7846287451150673]


Soft statistics: 
	[avg, std]: [0.7900743697625838, 0.014704433866238077]

Hard statistics: 
	[avg, std]: [0.7748760684291213, 0.013591812660429397]


In [14]:
# report statistics

print('Soft results: ', f1s_soft_LR)
print('\nHard results: ', f1s_hard_LR)

f1s_soft_LR = np.array(f1s_soft_LR)
f1s_hard_LR = np.array(f1s_hard_LR)

FewShot_Results_LR = {'soft': [f1s_soft_LR.mean(), f1s_soft_LR.std()], 'hard': [f1s_hard_LR.mean(), f1s_hard_LR.std()]}

print('\n\nSoft statistics with LOGISTIC REGRESSION: ')
print('\t[avg, std]:', FewShot_Results_LR['soft'])

print('\nHard statistics with LOGISTIC REGRESSION: ')
print('\t[avg, std]:', FewShot_Results_LR['hard'])

Soft results:  [0.8166539343009932, 0.7632293657487292, 0.7969574522332362, 0.7888872599325611, 0.7801739130434782]

Hard results:  [0.7971658794109475, 0.7702905206168125, 0.7871547012031479, 0.7638724911452184, 0.7785168288903206]


Soft statistics with LOGISTIC REGRESSION: 
	[avg, std]: [0.7891803850517995, 0.01771968704877703]

Hard statistics with LOGISTIC REGRESSION: 
	[avg, std]: [0.7794000842532893, 0.01183746593689673]


## Training and Testing

In [13]:
from tools.DataLoaders import DatasetPAN

baseTest.get_all_data(_TWEET_BATCH_SIZE_, tokenizer, _MAX_SEQ_LEN_, _PREPROCESS_TEXT_, NLI=True, label_hyp=label_hyp, nli_label2id=nli_label2id)

Test = DatasetPAN(baseTest, label_name)


Reading data...
    Done
Preprocessing text...
    Done
Tokenizing...
    Done
Merging data...
    Done

Total Instances: 105600



In [14]:
from transformers import TrainingArguments

samples = 4 * _NUM_AUTHORS_ * int(100 / _TWEET_BATCH_SIZE_)
_LOGGING_STEPS_ = int(samples / _BATCH_SIZE_)

training_args = TrainingArguments(
    learning_rate               = _LEARNING_RATE_,
    num_train_epochs            = _EPOCHS_,
    per_device_train_batch_size = _BATCH_SIZE_,
    per_device_eval_batch_size  = 200,
    logging_steps               = _LOGGING_STEPS_,
    output_dir                  = _OUTPUT_DIR_,
    save_total_limit            = 10,
    overwrite_output_dir        = True,
    remove_unused_columns       = False,
)

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [None]:
from transformers import AutoModelForSequenceClassification
from tools.DataLoaders import DatasetCrossValnli
from transformers import Trainer
from tools.Testing import compute_author_predictions_nli, compute_author_predictions_nli_LR
from sklearn.metrics import f1_score, classification_report
import pickle


# train

task = label_name

f1s_soft = []
f1s_hard = []

f1s_soft_LR = []
f1s_hard_LR = []

for split in range( _K_FOLD_CV_ ):
    
    # loaders for current split ------------------------------------------
    
    authors_train, authors_val = crossVal_splits[split]
    
    Train = DatasetCrossValnli(baseTrain, authors_train)
    Val   = DatasetCrossValnli(baseTrain, authors_val)
    
    
    # initialize model ---------------------------------------------------
    
    model = AutoModelForSequenceClassification.from_pretrained(_PRETRAINED_LM_)
    
    
    # create trainer and train -------------------------------------------
        
    trainer = Trainer(
        model           = model,
        args            = training_args,
        train_dataset   = Train,
    )
    trainer.args._n_gpu = _NO_GPUS_

    trainer.train()
    
    
    # get predictions ----------------------------------------------------
    
    
    ignore_keys = None
    if is_encoder_decoder:
        ignore_keys = ['encoder_last_hidden_state']

    results            = trainer.predict(Test , ignore_keys = ignore_keys)
    author_predictions = compute_author_predictions_nli(baseTest, results.predictions, task, len(class_dict), nli_label2id)

    # report metrics 

    report = {'soft': classification_report(author_predictions['true'], author_predictions['pred_soft'], digits=4), 
               'hard': classification_report(author_predictions['true'], author_predictions['pred_hard'], digits=4)}

    f1s_soft.append( f1_score(author_predictions['true'], author_predictions['pred_soft'], average = 'macro') )
    f1s_hard.append( f1_score(author_predictions['true'], author_predictions['pred_hard'], average = 'macro') )
    
    print("Results with split " + str(split + 1) + ":\n")
    print("soft voting:\n", report['soft'], '\n')
    print("hard voting:\n", report['hard'])


    # get predictions with Logistic Regression----------------------------

    resultsTrain = trainer.predict(Train, ignore_keys = ignore_keys)
    author_predictions_LR = compute_author_predictions_nli_LR(Train, baseTest, resultsTrain.predictions, results.predictions, task, len(class_dict))
    
    f1s_soft_LR.append( f1_score(author_predictions_LR['true'], author_predictions_LR['pred_soft'], average = 'macro') )
    f1s_hard_LR.append( f1_score(author_predictions_LR['true'], author_predictions_LR['pred_hard'], average = 'macro') )
    
    # report metrics 

    report_LR = {'soft': classification_report(author_predictions_LR['true'], author_predictions_LR['pred_soft'], digits=4), 
               'hard': classification_report(author_predictions_LR['true'], author_predictions_LR['pred_hard'], digits=4)}

    print("Results with split " + str(split + 1) + " using LOGISTIC REGRESSION:\n")
    print("soft voting:\n", report_LR['soft'], '\n')
    print("hard voting:\n", report_LR['hard'])

     
    
    # save predictions ----------------------------------------------------
    
    DIR = 'results/' + _DATASET_ + '/' + _LANGUAGE_ + '/' + _PRED_DIR_ + '/' + str(_NUM_AUTHORS_) + '_authors/test_split_' + str(split + 1) + '/'
    if not os.path.exists(DIR):
        os.makedirs(DIR)

    with open(DIR + 'predictions.pickle', 'wb') as f:
        pickle.dump(author_predictions, f)
    
    with open(DIR + 'predictions_LR.pickle', 'wb') as f:
        pickle.dump(author_predictions_LR, f)

    with open(DIR + 'report.txt', 'w') as f:
        f.write("soft voting:\n" + report['soft'] + '\n\n')
        f.write("hard voting:\n" + report['hard'])
        
    with open(DIR + 'report_LR.txt', 'w') as f:
        f.write("soft voting:\n" + report_LR['soft'] + '\n\n')
        f.write("hard voting:\n" + report_LR['hard'])
    


loading configuration file https://huggingface.co/ynie/bart-large-snli_mnli_fever_anli_R1_R2_R3-nli/resolve/main/config.json from cache at /001/usuarios/isaac.bribiesca/.cache/huggingface/transformers/bf704e14bcd921d2d4cfcad78a3add263a85a5d067122102d3add0fb620085c7.88e321f78373dda73f5c421340751fd102e1cf513f3e985ac0ca9a0865c4e94a
Model config BartConfig {
  "_name_or_path": "ynie/bart-large-snli_mnli_fever_anli_R1_R2_R3-nli",
  "_num_labels": 3,
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "BartForSequenceClassification"
  ],
  "attention_dropout": 0.0,
  "bos_token_id": 0,
  "classif_dropout": 0.0,
  "classifier_dropout": 0.0,
  "d_model": 1024,
  "decoder_attention_heads": 16,
  "decoder_ffn_dim": 4096,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 2,
  "dropout": 0.1,
  "encoder_attention_heads": 16,
  "encoder_ffn_dim": 4096,
  "encoder_layerdrop":

Step,Training Loss
40,3.1033
80,3.0835
120,2.8298
160,2.8776
200,2.8328
240,2.8278
280,2.908
320,2.6772
360,2.8387
400,2.5648


Saving model checkpoint to checkPointsNLI/checkpoint-500
Configuration saved in checkPointsNLI/checkpoint-500/config.json
Model weights saved in checkPointsNLI/checkpoint-500/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Prediction *****
  Num examples = 105600
  Batch size = 200


acc: 0.5: 100%|███████████████████████████████████████████████████████████| 2640/2640 [01:05<00:00, 40.03it/s]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
***** Running Prediction *****
  Num examples = 640
  Batch size = 200


Results with split 1:

soft voting:
               precision    recall  f1-score   support

           0     0.5000    1.0000    0.6667      1320
           1     0.0000    0.0000    0.0000      1320

    accuracy                         0.5000      2640
   macro avg     0.2500    0.5000    0.3333      2640
weighted avg     0.2500    0.5000    0.3333      2640
 

hard voting:
               precision    recall  f1-score   support

           0     0.5000    1.0000    0.6667      1320
           1     0.0000    0.0000    0.0000      1320

    accuracy                         0.5000      2640
   macro avg     0.2500    0.5000    0.3333      2640
weighted avg     0.2500    0.5000    0.3333      2640



acc: 0.7106060606060606: 100%|████████████████████████████████████████████| 2640/2640 [01:08<00:00, 38.61it/s]


Results with split 1 using LOGISTIC REGRESSION:

soft voting:
               precision    recall  f1-score   support

           0     0.6928    0.7568    0.7234      1320
           1     0.7321    0.6644    0.6966      1320

    accuracy                         0.7106      2640
   macro avg     0.7124    0.7106    0.7100      2640
weighted avg     0.7124    0.7106    0.7100      2640
 

hard voting:
               precision    recall  f1-score   support

           0     0.6764    0.8076    0.7362      1320
           1     0.7613    0.6136    0.6795      1320

    accuracy                         0.7106      2640
   macro avg     0.7188    0.7106    0.7079      2640
weighted avg     0.7188    0.7106    0.7079      2640



loading configuration file https://huggingface.co/ynie/bart-large-snli_mnli_fever_anli_R1_R2_R3-nli/resolve/main/config.json from cache at /001/usuarios/isaac.bribiesca/.cache/huggingface/transformers/bf704e14bcd921d2d4cfcad78a3add263a85a5d067122102d3add0fb620085c7.88e321f78373dda73f5c421340751fd102e1cf513f3e985ac0ca9a0865c4e94a
Model config BartConfig {
  "_name_or_path": "ynie/bart-large-snli_mnli_fever_anli_R1_R2_R3-nli",
  "_num_labels": 3,
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "BartForSequenceClassification"
  ],
  "attention_dropout": 0.0,
  "bos_token_id": 0,
  "classif_dropout": 0.0,
  "classifier_dropout": 0.0,
  "d_model": 1024,
  "decoder_attention_heads": 16,
  "decoder_ffn_dim": 4096,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 2,
  "dropout": 0.1,
  "encoder_attention_heads": 16,
  "encoder_ffn_dim": 4096,
  "encoder_layerdrop":

Step,Training Loss
40,3.031


In [None]:
# report statistics

print('Soft results: ', f1s_soft)
print('\nHard results: ', f1s_hard)

f1s_soft = np.array(f1s_soft)
f1s_hard = np.array(f1s_hard)

FewShot_Results = {'soft': [f1s_soft.mean(), f1s_soft.std()], 'hard': [f1s_hard.mean(), f1s_hard.std()]}

print('\n\nSoft statistics: ')
print('\t[avg, std]:', FewShot_Results['soft'])

print('\nHard statistics: ')
print('\t[avg, std]:', FewShot_Results['hard'])

In [None]:
# report statistics

print('Soft results: ', f1s_soft_LR)
print('\nHard results: ', f1s_hard_LR)

f1s_soft_LR = np.array(f1s_soft_LR)
f1s_hard_LR = np.array(f1s_hard_LR)

FewShot_Results_LR = {'soft': [f1s_soft_LR.mean(), f1s_soft_LR.std()], 'hard': [f1s_hard_LR.mean(), f1s_hard_LR.std()]}

print('\n\nSoft statistics with LOGISTIC REGRESSION: ')
print('\t[avg, std]:', FewShot_Results_LR['soft'])

print('\nHard statistics with LOGISTIC REGRESSION: ')
print('\t[avg, std]:', FewShot_Results_LR['hard'])