# BERTimbau Classifier - Training and Validation

## Imports

In [1]:
from simpletransformers.classification import ClassificationModel, ClassificationArgs
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, matthews_corrcoef
import pandas as pd
import time as t
import statistics


2021-10-14 19:05:42.625646: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0


## Reading Datasets

In [2]:
# Loading the MSMARCO Dataset
MSMARCO = pd.read_csv("MSMARCO_biomedical_PT/train.csv")[["question_pt"]]

# Cleaning MSMARCO Dataset
MSMARCO['question_pt'] = MSMARCO['question_pt'].apply(lambda x: x.rstrip('\n'))
MSMARCO['question_pt'] = MSMARCO['question_pt'].apply(lambda x: x.rstrip('\t'))
MSMARCO['question_pt'] = MSMARCO['question_pt'].apply(lambda x: x.rstrip('?'))
MSMARCO['question_pt'] = MSMARCO['question_pt'].apply(lambda x: x.rstrip('.'))

#MSMARCO.tail(20)



# Loading the MIMIC Dataset
MIMIC_dev = pd.read_csv("MIMICSQL_PT/dev.csv")[["question_pt"]]
MIMIC_train = pd.read_csv("MIMICSQL_PT/train.csv")[["question_pt"]]

MIMIC = pd.concat([MIMIC_dev, MIMIC_train], ignore_index=True)

# Cleaning MIMIC Dataset
MIMIC['question_pt'] = MIMIC['question_pt'].apply(lambda x: x.rstrip('\n'))
MIMIC['question_pt'] = MIMIC['question_pt'].apply(lambda x: x.rstrip('\t'))
MIMIC['question_pt'] = MIMIC['question_pt'].apply(lambda x: x.rstrip('?'))
MIMIC['question_pt'] = MIMIC['question_pt'].apply(lambda x: x.rstrip('.'))



# Loading the SPIDER Dataset
SPIDER_med = pd.read_csv("SPIDER/medicine_enzyme_interaction.txt", sep=";;", header=None)
SPIDER_protein = pd.read_csv("SPIDER/protein_institute.txt", sep=";;", header=None)
SPIDER_scientist = pd.read_csv("SPIDER/scientist_1.txt", sep=";;", header=None)

SPIDER = pd.concat([SPIDER_med, SPIDER_protein, SPIDER_scientist], ignore_index=True)
SPIDER.columns = ["question_pt"]

# Cleaning SPIDER Dataset
SPIDER['question_pt'] = SPIDER['question_pt'].apply(lambda x: x.rstrip('\n'))
SPIDER['question_pt'] = SPIDER['question_pt'].apply(lambda x: x.rstrip('\t'))
SPIDER['question_pt'] = SPIDER['question_pt'].apply(lambda x: x.rstrip('?'))
SPIDER['question_pt'] = SPIDER['question_pt'].apply(lambda x: x.rstrip('.'))

  return func(*args, **kwargs)


## Joining Datasets

In [3]:
#Join datasets


MSMARCO["type"] = 1
MIMIC["type"] = 0
SPIDER["type"] = 0


tp0 = pd.concat([MIMIC, SPIDER], ignore_index=True)

df = pd.concat([MSMARCO.head(tp0.count()[0]), tp0], ignore_index = True)


## Cross Validation Training

In [4]:
tic = t.time()

model_args = {
    'num_train_epochs': 5,
    'learning_rate': 5e-5,
    'max_seq_length': 512,
    'n_gpu': 2,
    'optimizer': 'AdamW',
    'train_batch_size': 4,
    'gradient_accumulation_steps': 8,
    'overwrite_output_dir': True,
}

# Prepare cross validation
skf = StratifiedKFold(n_splits=10, random_state=42, shuffle=True)

results_acc = []
results_mcc = []
results_f1_weighted = []
results_f1_micro = []
results_f1_macro = []
results_eval_loss = []

for train_index, val_index in skf.split(X=df['question_pt'], y=df['type']):
    # Splitting Dataframe (dataset not included)
    train_df = df.iloc[train_index]
    val_df = df.iloc[val_index]
    
    # Defining and training the model
    model = ClassificationModel('bert', 'neuralmind/bert-base-portuguese-cased', num_labels=2, args=model_args)
    model.train_model(train_df)
    
    # Validate the model
    result, model_outputs, wrong_predictions = model.eval_model(val_df)
    preds, _ = model.predict(list(val_df['question_pt']))
    
    acc = accuracy_score(val_df['type'].values, preds)
    mcc = matthews_corrcoef(val_df['type'].values, preds)
    f1_weighted = f1_score(val_df['type'].values, preds, average='weighted')
    f1_micro = f1_score(val_df['type'].values, preds, average='micro')
    f1_macro = f1_score(val_df['type'].values, preds, average='macro')
    
    # Append model scores
    results_acc.append(acc)
    results_mcc.append(mcc)
    results_f1_weighted.append(f1_weighted)
    results_f1_micro.append(f1_micro)
    results_f1_macro.append(f1_macro)
    results_eval_loss.append(result['eval_loss'])
    
    print('\n################# RESULTS #################')
    print('\nAccumulated data:')
    print('>> results_acc', results_acc)    
    print('>> results_mcc', results_mcc)
    print('>> results_f1_weighted', results_f1_weighted)
    print('>> results_f1_micro', results_f1_micro)
    print('>> results_f1_macro', results_f1_macro)
    print('>> results_eval_loss', results_eval_loss)
    
    print('\nAVGs:')
    print('>> avg_acc:', round(sum(results_acc) / len(results_acc), 3))
    print('>> avg_mcc:', round(sum(results_mcc) / len(results_mcc), 3))
    print('>> avg_f1_weighted:', round(sum(results_f1_weighted) / len(results_f1_weighted), 3))
    print('>> avg_f1_micro:', round(sum(results_f1_micro) / len(results_f1_micro), 3))
    print('>> avg_f1_macro:', round(sum(results_f1_macro) / len(results_f1_macro), 3))
    print('>> avg_results_eval_loss:', round(sum(results_eval_loss) / len(results_eval_loss), 3))

    
tac = t.time()
duration = round((tac - tic)/60, 2)
print('\n>> Elapsed time: {}min\n'.format(duration))

Some weights of the model checkpoint at neuralmind/bert-base-portuguese-cased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the

  0%|          | 0/16401 [00:00<?, ?it/s]

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/4101 [00:00<?, ?it/s]



Running Epoch 1 of 5:   0%|          | 0/4101 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/4101 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/4101 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/4101 [00:00<?, ?it/s]



  0%|          | 0/1823 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/228 [00:00<?, ?it/s]

  0%|          | 0/1823 [00:00<?, ?it/s]

  0%|          | 0/228 [00:00<?, ?it/s]




################# RESULTS #################

Accumulated data:
>> results_acc [0.9994514536478333]
>> results_mcc [0.9989035074529009]
>> results_f1_weighted [0.9994514533177142]
>> results_f1_micro [0.9994514536478333]
>> results_f1_macro [0.9994514529875953]
>> results_eval_loss [0.0019295816069771128]

AVGs:
>> avg_acc: 0.999
>> avg_mcc: 0.999
>> avg_f1_weighted: 0.999
>> avg_f1_micro: 0.999
>> avg_f1_macro: 0.999
>> avg_results_eval_loss: 0.002


Some weights of the model checkpoint at neuralmind/bert-base-portuguese-cased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the

  0%|          | 0/16401 [00:00<?, ?it/s]

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/4101 [00:00<?, ?it/s]



Running Epoch 1 of 5:   0%|          | 0/4101 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/4101 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/4101 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/4101 [00:00<?, ?it/s]



  0%|          | 0/1823 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/228 [00:00<?, ?it/s]

  0%|          | 0/1823 [00:00<?, ?it/s]

  0%|          | 0/228 [00:00<?, ?it/s]




################# RESULTS #################

Accumulated data:
>> results_acc [0.9994514536478333, 1.0]
>> results_mcc [0.9989035074529009, 1.0]
>> results_f1_weighted [0.9994514533177142, 1.0]
>> results_f1_micro [0.9994514536478333, 1.0]
>> results_f1_macro [0.9994514529875953, 1.0]
>> results_eval_loss [0.0019295816069771128, 0.00014591706853376278]

AVGs:
>> avg_acc: 1.0
>> avg_mcc: 0.999
>> avg_f1_weighted: 1.0
>> avg_f1_micro: 1.0
>> avg_f1_macro: 1.0
>> avg_results_eval_loss: 0.001


Some weights of the model checkpoint at neuralmind/bert-base-portuguese-cased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the

  0%|          | 0/16401 [00:00<?, ?it/s]

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/4101 [00:00<?, ?it/s]



Running Epoch 1 of 5:   0%|          | 0/4101 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/4101 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/4101 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/4101 [00:00<?, ?it/s]



  0%|          | 0/1823 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/228 [00:00<?, ?it/s]

  0%|          | 0/1823 [00:00<?, ?it/s]

  0%|          | 0/228 [00:00<?, ?it/s]




################# RESULTS #################

Accumulated data:
>> results_acc [0.9994514536478333, 1.0, 0.9989029072956664]
>> results_mcc [0.9989035074529009, 1.0, 0.997808213248941]
>> results_f1_weighted [0.9994514533177142, 1.0, 0.9989029053149495]
>> results_f1_micro [0.9994514536478333, 1.0, 0.9989029072956664]
>> results_f1_macro [0.9994514529875953, 1.0, 0.9989029043245911]
>> results_eval_loss [0.0019295816069771128, 0.00014591706853376278, 0.007889651173850883]

AVGs:
>> avg_acc: 0.999
>> avg_mcc: 0.999
>> avg_f1_weighted: 0.999
>> avg_f1_micro: 0.999
>> avg_f1_macro: 0.999
>> avg_results_eval_loss: 0.003


Some weights of the model checkpoint at neuralmind/bert-base-portuguese-cased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the

  0%|          | 0/16401 [00:00<?, ?it/s]

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/4101 [00:00<?, ?it/s]



Running Epoch 1 of 5:   0%|          | 0/4101 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/4101 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/4101 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/4101 [00:00<?, ?it/s]



  0%|          | 0/1823 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/228 [00:00<?, ?it/s]

  0%|          | 0/1823 [00:00<?, ?it/s]

  0%|          | 0/228 [00:00<?, ?it/s]




################# RESULTS #################

Accumulated data:
>> results_acc [0.9994514536478333, 1.0, 0.9989029072956664, 1.0]
>> results_mcc [0.9989035074529009, 1.0, 0.997808213248941, 1.0]
>> results_f1_weighted [0.9994514533177142, 1.0, 0.9989029053149495, 1.0]
>> results_f1_micro [0.9994514536478333, 1.0, 0.9989029072956664, 1.0]
>> results_f1_macro [0.9994514529875953, 1.0, 0.9989029043245911, 1.0]
>> results_eval_loss [0.0019295816069771128, 0.00014591706853376278, 0.007889651173850883, 3.061985977949103e-05]

AVGs:
>> avg_acc: 1.0
>> avg_mcc: 0.999
>> avg_f1_weighted: 1.0
>> avg_f1_micro: 1.0
>> avg_f1_macro: 1.0
>> avg_results_eval_loss: 0.002


Some weights of the model checkpoint at neuralmind/bert-base-portuguese-cased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the

  0%|          | 0/16402 [00:00<?, ?it/s]

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/4101 [00:00<?, ?it/s]



Running Epoch 1 of 5:   0%|          | 0/4101 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/4101 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/4101 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/4101 [00:00<?, ?it/s]



  0%|          | 0/1822 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/228 [00:00<?, ?it/s]

  0%|          | 0/1822 [00:00<?, ?it/s]

  0%|          | 0/228 [00:00<?, ?it/s]




################# RESULTS #################

Accumulated data:
>> results_acc [0.9994514536478333, 1.0, 0.9989029072956664, 1.0, 0.9994511525795828]
>> results_mcc [0.9989035074529009, 1.0, 0.997808213248941, 1.0, 0.9989029069653665]
>> results_f1_weighted [0.9994514533177142, 1.0, 0.9989029053149495, 1.0, 0.9994511524142516]
>> results_f1_micro [0.9994514536478333, 1.0, 0.9989029072956664, 1.0, 0.9994511525795828]
>> results_f1_macro [0.9994514529875953, 1.0, 0.9989029043245911, 1.0, 0.9994511524142516]
>> results_eval_loss [0.0019295816069771128, 0.00014591706853376278, 0.007889651173850883, 3.061985977949103e-05, 0.005370998285893859]

AVGs:
>> avg_acc: 1.0
>> avg_mcc: 0.999
>> avg_f1_weighted: 1.0
>> avg_f1_micro: 1.0
>> avg_f1_macro: 1.0
>> avg_results_eval_loss: 0.003


Some weights of the model checkpoint at neuralmind/bert-base-portuguese-cased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the

  0%|          | 0/16402 [00:00<?, ?it/s]

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/4101 [00:00<?, ?it/s]



Running Epoch 1 of 5:   0%|          | 0/4101 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/4101 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/4101 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/4101 [00:00<?, ?it/s]



  0%|          | 0/1822 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/228 [00:00<?, ?it/s]

  0%|          | 0/1822 [00:00<?, ?it/s]

  0%|          | 0/228 [00:00<?, ?it/s]




################# RESULTS #################

Accumulated data:
>> results_acc [0.9994514536478333, 1.0, 0.9989029072956664, 1.0, 0.9994511525795828, 0.9994511525795828]
>> results_mcc [0.9989035074529009, 1.0, 0.997808213248941, 1.0, 0.9989029069653665, 0.9989029069653665]
>> results_f1_weighted [0.9994514533177142, 1.0, 0.9989029053149495, 1.0, 0.9994511524142516, 0.9994511524142516]
>> results_f1_micro [0.9994514536478333, 1.0, 0.9989029072956664, 1.0, 0.9994511525795828, 0.9994511525795828]
>> results_f1_macro [0.9994514529875953, 1.0, 0.9989029043245911, 1.0, 0.9994511524142516, 0.9994511524142516]
>> results_eval_loss [0.0019295816069771128, 0.00014591706853376278, 0.007889651173850883, 3.061985977949103e-05, 0.005370998285893859, 0.00641160039910111]

AVGs:
>> avg_acc: 1.0
>> avg_mcc: 0.999
>> avg_f1_weighted: 1.0
>> avg_f1_micro: 1.0
>> avg_f1_macro: 1.0
>> avg_results_eval_loss: 0.004


Some weights of the model checkpoint at neuralmind/bert-base-portuguese-cased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the

  0%|          | 0/16402 [00:00<?, ?it/s]

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/4101 [00:00<?, ?it/s]



Running Epoch 1 of 5:   0%|          | 0/4101 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/4101 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/4101 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/4101 [00:00<?, ?it/s]



  0%|          | 0/1822 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/228 [00:00<?, ?it/s]

  0%|          | 0/1822 [00:00<?, ?it/s]

  0%|          | 0/228 [00:00<?, ?it/s]




################# RESULTS #################

Accumulated data:
>> results_acc [0.9994514536478333, 1.0, 0.9989029072956664, 1.0, 0.9994511525795828, 0.9994511525795828, 0.9983534577387486]
>> results_mcc [0.9989035074529009, 1.0, 0.997808213248941, 1.0, 0.9989029069653665, 0.9989029069653665, 0.996707515961047]
>> results_f1_weighted [0.9994514533177142, 1.0, 0.9989029053149495, 1.0, 0.9994511524142516, 0.9994511524142516, 0.9983534572427548]
>> results_f1_micro [0.9994514536478333, 1.0, 0.9989029072956664, 1.0, 0.9994511525795828, 0.9994511525795828, 0.9983534577387486]
>> results_f1_macro [0.9994514529875953, 1.0, 0.9989029043245911, 1.0, 0.9994511524142516, 0.9994511524142516, 0.9983534572427548]
>> results_eval_loss [0.0019295816069771128, 0.00014591706853376278, 0.007889651173850883, 3.061985977949103e-05, 0.005370998285893859, 0.00641160039910111, 0.010651623390322927]

AVGs:
>> avg_acc: 0.999
>> avg_mcc: 0.999
>> avg_f1_weighted: 0.999
>> avg_f1_micro: 0.999
>> avg_f1_macro: 0.

Some weights of the model checkpoint at neuralmind/bert-base-portuguese-cased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the

  0%|          | 0/16402 [00:00<?, ?it/s]

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/4101 [00:00<?, ?it/s]



Running Epoch 1 of 5:   0%|          | 0/4101 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/4101 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/4101 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/4101 [00:00<?, ?it/s]



  0%|          | 0/1822 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/228 [00:00<?, ?it/s]

  0%|          | 0/1822 [00:00<?, ?it/s]

  0%|          | 0/228 [00:00<?, ?it/s]




################# RESULTS #################

Accumulated data:
>> results_acc [0.9994514536478333, 1.0, 0.9989029072956664, 1.0, 0.9994511525795828, 0.9994511525795828, 0.9983534577387486, 1.0]
>> results_mcc [0.9989035074529009, 1.0, 0.997808213248941, 1.0, 0.9989029069653665, 0.9989029069653665, 0.996707515961047, 1.0]
>> results_f1_weighted [0.9994514533177142, 1.0, 0.9989029053149495, 1.0, 0.9994511524142516, 0.9994511524142516, 0.9983534572427548, 1.0]
>> results_f1_micro [0.9994514536478333, 1.0, 0.9989029072956664, 1.0, 0.9994511525795828, 0.9994511525795828, 0.9983534577387486, 1.0]
>> results_f1_macro [0.9994514529875953, 1.0, 0.9989029043245911, 1.0, 0.9994511524142516, 0.9994511524142516, 0.9983534572427548, 1.0]
>> results_eval_loss [0.0019295816069771128, 0.00014591706853376278, 0.007889651173850883, 3.061985977949103e-05, 0.005370998285893859, 0.00641160039910111, 0.010651623390322927, 0.00022643909352681427]

AVGs:
>> avg_acc: 0.999
>> avg_mcc: 0.999
>> avg_f1_weighted:

Some weights of the model checkpoint at neuralmind/bert-base-portuguese-cased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the

  0%|          | 0/16402 [00:00<?, ?it/s]

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/4101 [00:00<?, ?it/s]



Running Epoch 1 of 5:   0%|          | 0/4101 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/4101 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/4101 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/4101 [00:00<?, ?it/s]



  0%|          | 0/1822 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/228 [00:00<?, ?it/s]

  0%|          | 0/1822 [00:00<?, ?it/s]

  0%|          | 0/228 [00:00<?, ?it/s]




################# RESULTS #################

Accumulated data:
>> results_acc [0.9994514536478333, 1.0, 0.9989029072956664, 1.0, 0.9994511525795828, 0.9994511525795828, 0.9983534577387486, 1.0, 0.9989023051591658]
>> results_mcc [0.9989035074529009, 1.0, 0.997808213248941, 1.0, 0.9989029069653665, 0.9989029069653665, 0.996707515961047, 1.0, 0.9978070149043516]
>> results_f1_weighted [0.9994514533177142, 1.0, 0.9989029053149495, 1.0, 0.9994511524142516, 0.9994511524142516, 0.9983534572427548, 1.0, 0.9989023038365143]
>> results_f1_micro [0.9994514536478333, 1.0, 0.9989029072956664, 1.0, 0.9994511525795828, 0.9994511525795828, 0.9983534577387486, 1.0, 0.9989023051591658]
>> results_f1_macro [0.9994514529875953, 1.0, 0.9989029043245911, 1.0, 0.9994511524142516, 0.9994511524142516, 0.9983534572427548, 1.0, 0.9989023038365143]
>> results_eval_loss [0.0019295816069771128, 0.00014591706853376278, 0.007889651173850883, 3.061985977949103e-05, 0.005370998285893859, 0.00641160039910111, 0.010651

Some weights of the model checkpoint at neuralmind/bert-base-portuguese-cased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the

  0%|          | 0/16402 [00:00<?, ?it/s]

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/4101 [00:00<?, ?it/s]



Running Epoch 1 of 5:   0%|          | 0/4101 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/4101 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/4101 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/4101 [00:00<?, ?it/s]



  0%|          | 0/1822 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/228 [00:00<?, ?it/s]

  0%|          | 0/1822 [00:00<?, ?it/s]

  0%|          | 0/228 [00:00<?, ?it/s]




################# RESULTS #################

Accumulated data:
>> results_acc [0.9994514536478333, 1.0, 0.9989029072956664, 1.0, 0.9994511525795828, 0.9994511525795828, 0.9983534577387486, 1.0, 0.9989023051591658, 0.9989023051591658]
>> results_mcc [0.9989035074529009, 1.0, 0.997808213248941, 1.0, 0.9989029069653665, 0.9989029069653665, 0.996707515961047, 1.0, 0.9978070149043516, 0.9978070149043516]
>> results_f1_weighted [0.9994514533177142, 1.0, 0.9989029053149495, 1.0, 0.9994511524142516, 0.9994511524142516, 0.9983534572427548, 1.0, 0.9989023038365143, 0.9989023038365143]
>> results_f1_micro [0.9994514536478333, 1.0, 0.9989029072956664, 1.0, 0.9994511525795828, 0.9994511525795828, 0.9983534577387486, 1.0, 0.9989023051591658, 0.9989023051591658]
>> results_f1_macro [0.9994514529875953, 1.0, 0.9989029043245911, 1.0, 0.9994511524142516, 0.9994511524142516, 0.9983534572427548, 1.0, 0.9989023038365143, 0.9989023038365143]
>> results_eval_loss [0.0019295816069771128, 0.000145917068533762

## Check final results

In [5]:
print('Final results with std')
metrics = ['acc', 'mcc', 'f1_weighted', 'f1_micro', 'f1_macro', 'eval_loss']
results = [results_acc, results_mcc, results_f1_weighted, results_f1_micro, results_f1_macro, results_eval_loss]

for m, r in zip(metrics, results):
    avg = round(statistics.mean(r), 3)
    std = round(statistics.stdev(r), 3)
    print('>> {}: {} +/- {}'.format(m, avg, std))

Final results with std
>> acc: 0.999 +/- 0.001
>> mcc: 0.999 +/- 0.001
>> f1_weighted: 0.999 +/- 0.001
>> f1_micro: 0.999 +/- 0.001
>> f1_macro: 0.999 +/- 0.001
>> eval_loss: 0.005 +/- 0.004


## Training with the entire dataset

In [6]:
model_args = {
    'num_train_epochs': 5,
    'learning_rate': 5e-5,
    'max_seq_length': 512,
    'n_gpu': 2,
    'optimizer': 'AdamW',
    'train_batch_size': 4,
    'gradient_accumulation_steps': 8,
    'overwrite_output_dir': True,
}



# Defining and training the model
model = ClassificationModel('bert', 'neuralmind/bert-base-portuguese-cased', num_labels=2, args=model_args)
model.train_model(df)

Some weights of the model checkpoint at neuralmind/bert-base-portuguese-cased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the

  0%|          | 0/18224 [00:00<?, ?it/s]

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/4556 [00:00<?, ?it/s]



Running Epoch 1 of 5:   0%|          | 0/4556 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/4556 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/4556 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/4556 [00:00<?, ?it/s]

(2845, 0.01409631805715618)