# Single BERT Sample

> https://medium.com/towards-artificial-intelligence/text-classification-with-simple-transformers-a29d13358135



In [1]:
!pip install --upgrade transformers
!pip install simpletransformers

Requirement already up-to-date: transformers in /usr/local/lib/python3.6/dist-packages (4.2.1)
Collecting watchdog<0.10.5,>=0.8.3
[?25l  Downloading https://files.pythonhosted.org/packages/6f/10/500580a0987363a0d9e1f3dd5cb1bba94a47e19266c6ce9dfb6cdd455758/watchdog-0.10.4.tar.gz (98kB)
[K     |████████████████████████████████| 102kB 6.3MB/s 
Collecting pathtools>=0.1.1
  Downloading https://files.pythonhosted.org/packages/e7/7f/470d6fcdf23f9f3518f6b0b76be9df16dcc8630ad409947f8be2eb0ed13a/pathtools-0.1.2.tar.gz
Building wheels for collected packages: watchdog, pathtools
  Building wheel for watchdog (setup.py) ... [?25l[?25hdone
  Created wheel for watchdog: filename=watchdog-0.10.4-cp36-none-any.whl size=74842 sha256=68fc6bde121e8c42c37cdb83fa519b6530fdf4a6bde18f32d03b8d5596dde526
  Stored in directory: /root/.cache/pip/wheels/9e/11/04/5160b8815b0cc7cf574bdc6d053e510169ec264c8791b4ec3a
  Building wheel for pathtools (setup.py) ... [?25l[?25hdone
  Created wheel for pathtools: file

In [2]:
import numpy as np
import pandas as pd
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from simpletransformers.classification import ClassificationModel, ClassificationArgs
from sklearn.metrics import classification_report, confusion_matrix
import itertools
import seaborn as sns
import matplotlib.pyplot as plt
import wandb
import logging
from google.colab import files
import json
import gc
import time

In [4]:
logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)

In [5]:
def sigmoid(x):
    z = (1/(1 + np.exp(-x)))
    return z

def show_confusion_matrix(confusion_matrix, name_model, file_matriz_name):
    hmap = sns.heatmap(confusion_matrix, annot=True, fmt="d", cmap="Blues")
    hmap.yaxis.set_ticklabels(hmap.yaxis.get_ticklabels(), rotation=0, ha='right')
    hmap.xaxis.set_ticklabels(hmap.xaxis.get_ticklabels(), rotation=30, ha='right')
    plt.title('Matriz de Confusão: ' + str(name_model))
    plt.ylabel('Classificação Real')
    plt.xlabel('Classificação Predita')
    plt.savefig(file_matriz_name)
    plt.close()

In [6]:
class arguments:  
    def __init__(self, model_name, batch_size, learning_rate, epochs, model_type):
        self.model_name = model_name
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.model_type = model_type

list_arguments = []

ARR_BATCH_SIZE = [16, 32]
ARR_LEARNING_RATE = [5e-5, 3e-5, 2e-5]
ARR_EPOCHS = [2, 3, 4, 5]
#ARR_EPOCHS_MODEL_DEFAULT = [2, 3, 5, 6, 7, 10]

I_MODEL = 0

for batch_size in ARR_BATCH_SIZE:
  for lr in ARR_LEARNING_RATE:
    for epoch in ARR_EPOCHS:
      I_MODEL += 1
      model_name = str("Model_"+str(I_MODEL))
      model_type = 'Fine_Tuning'
      list_arguments.append(arguments(model_name, batch_size, lr, epoch, model_type))

#for epoch in ARR_EPOCHS_MODEL_DEFAULT:
  #I_MODEL += 1
  #model_name = str("Model_"+str(I_MODEL))
  #model_type = 'Default'
  #list_arguments.append(arguments(model_name, 0, 0, epoch, model_type))

for params in list_arguments: 
    print('Modelo: ' + params.model_name, ' | Batch Size: ' + str(params.batch_size), ' | Learning Rate: ' + str(params.learning_rate), ' | Épocas: ' + str(params.epochs), sep = ' ' )

SEED=42
class_names = ['negativa', 'positiva']

Modelo: Model_1  | Batch Size: 16  | Learning Rate: 5e-05  | Épocas: 2
Modelo: Model_2  | Batch Size: 16  | Learning Rate: 5e-05  | Épocas: 3
Modelo: Model_3  | Batch Size: 16  | Learning Rate: 5e-05  | Épocas: 4
Modelo: Model_4  | Batch Size: 16  | Learning Rate: 5e-05  | Épocas: 5
Modelo: Model_5  | Batch Size: 16  | Learning Rate: 3e-05  | Épocas: 2
Modelo: Model_6  | Batch Size: 16  | Learning Rate: 3e-05  | Épocas: 3
Modelo: Model_7  | Batch Size: 16  | Learning Rate: 3e-05  | Épocas: 4
Modelo: Model_8  | Batch Size: 16  | Learning Rate: 3e-05  | Épocas: 5
Modelo: Model_9  | Batch Size: 16  | Learning Rate: 2e-05  | Épocas: 2
Modelo: Model_10  | Batch Size: 16  | Learning Rate: 2e-05  | Épocas: 3
Modelo: Model_11  | Batch Size: 16  | Learning Rate: 2e-05  | Épocas: 4
Modelo: Model_12  | Batch Size: 16  | Learning Rate: 2e-05  | Épocas: 5
Modelo: Model_13  | Batch Size: 32  | Learning Rate: 5e-05  | Épocas: 2
Modelo: Model_14  | Batch Size: 32  | Learning Rate: 5e-05  | Épocas: 3
M

In [7]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

file_path = "/content/drive/My Drive/data_full.csv"

data = pd.read_csv(file_path, sep=';').replace({np.NaN: None})
data.columns = ['sentenca','entidade1','entidade1_tipo','relacao','entidade2','entidade2_tipo']

cols = ['entidade1', 'entidade2']
data['entidades'] = data[cols].apply(lambda row: ' , '.join(row.values.astype(str)), axis=1)

data['class'] = data['relacao'].apply(lambda x: 0 if x is None else 1)

data.head()

Mounted at /content/drive


Unnamed: 0,sentenca,entidade1,entidade1_tipo,relacao,entidade2,entidade2_tipo,entidades,class
0,World Alliance of Reformed Churches condena a ...,Iraque,LOC,condena a guerra no,World Alliance of,ORG,"Iraque , World Alliance of",1
1,Em 19 de Dezembro de 1974 foi assinado um acor...,PAIGC,ORG,acordo entre,Portugal,LOC,"PAIGC , Portugal",1
2,Em 19 de Dezembro de 1974 foi assinado um acor...,PAIGC,ORG,,Cabo Verde,LOC,"PAIGC , Cabo Verde",0
3,Em 19 de Dezembro de 1974 foi assinado um acor...,Portugal,LOC,,Cabo Verde,LOC,"Portugal , Cabo Verde",0
4,"A 88i , plataforma de serviços digitais para s...",88i,ORG,foi aprovada no,Global Startup Program,ORG,"88i , Global Startup Program",1


In [None]:
data['sentence'] = data[['sentenca', 'entidades']].apply(lambda row: ' , '.join(row.values.astype(str)), axis=1)
data = data[['sentence','class']]
data.head()

Unnamed: 0,sentence,class
0,World Alliance of Reformed Churches condena a ...,1
1,Em 19 de Dezembro de 1974 foi assinado um acor...,1
2,Em 19 de Dezembro de 1974 foi assinado um acor...,0
3,Em 19 de Dezembro de 1974 foi assinado um acor...,0
4,"A 88i , plataforma de serviços digitais para s...",1


In [None]:
train_df, test_df = train_test_split(data, test_size=0.2, stratify=data['class'], random_state=SEED)
test_df, valid_df = train_test_split(test_df, test_size=0.5, stratify=test_df['class'], random_state=SEED)

print("train_df:", train_df.shape)
print("test_df:", test_df.shape)
print("valid_df:", valid_df.shape)

train_df: (2630, 2)
test_df: (329, 2)
valid_df: (329, 2)


In [None]:
arr_file_names = []
model_args = ClassificationArgs()

file_train_dataset_name = str("train_dataset.csv")
file_validation_dataset_name = str("validation_dataset.csv")
file_test_dataset_name = str("test_dataset.csv")

arr_file_names.append(file_train_dataset_name)
arr_file_names.append(file_validation_dataset_name)
arr_file_names.append(file_test_dataset_name)

In [None]:
for params in list_arguments: 
  if params.model_type == 'Fine_Tuning':
    print('Modelo: ' + params.model_name, ' | Batch Size: ' + str(params.batch_size), ' | Learning Rate: ' + str(params.learning_rate), ' | Épocas: ' + str(params.epochs), sep = ' ' )
    model_args.manual_seed = SEED
    model_args.learning_rate = params.learning_rate
    model_args.train_batch_size = params.batch_size
    model_args.num_train_epochs = params.epochs
    model_args.overwrite_output_dir = True
    model_args.save_steps = -1
    model_args.save_model_every_epoch = False
  else:
    print('Modelo: ' + params.model_name, ' | Épocas: ' + str(params.epochs), sep = ' ' )
    model_args.manual_seed = SEED
    model_args.num_train_epochs = params.epochs
    model_args.overwrite_output_dir = True
    model_args.save_steps = -1
    model_args.save_model_every_epoch = False

  model = ClassificationModel(
    model_type='bert', 
    model_name='neuralmind/bert-large-portuguese-cased', 
    use_cuda=True, 
    args=model_args
  )

  file_metrics_name = str("metrics_")+params.model_name+str(".json")
  file_matriz_name = str("matriz_")+params.model_name+str(".png")
  file_predictions_name = str("predictions_")+params.model_name+str(".csv")
  file_full_args_name = str("full_args_")+params.model_name+str(".txt")

  arr_file_names.append(file_metrics_name)
  arr_file_names.append(file_matriz_name)
  arr_file_names.append(file_predictions_name)
  arr_file_names.append(file_full_args_name)
  
  print(">>>>> " + str(params.model_name) + ": Treinando modelo")
  model.train_model(train_df)
  print(">>>>> " + str(params.model_name) + ": Validando com dados de Validação")
  result, model_outputs, wrong_preds = model.eval_model(valid_df)

  print(">>>>> " + str(params.model_name) + ": Calculando F-Score de Validação")
  sigmoid_preds = []
  predictions = []

  for x in model_outputs:
      sigmoid_pred = sigmoid(x)
      sigmoid_preds.append(np.argmax(sigmoid_pred))
      predictions.append(np.argmax(x))

  sigmoid_preds = np.array(sigmoid_preds)

  print('>>>>>>>>>> Arg-Max F1-score Validation:', round(f1_score(valid_df['class'], predictions), 4))
  print('>>>>>>>>>> Sigmoide F1-score Validation:', round(f1_score(valid_df['class'], sigmoid_preds), 4))

  print(">>>>> " + str(params.model_name) + ": Predizendo valores")
  test_df.reset_index(drop=True, inplace=True)
  test_predictions, raw_outputs = model.predict(test_df['sentence'])

  print(">>>>> " + str(params.model_name) + ": Métricas")
  print(classification_report(test_df['class'], test_predictions, target_names=class_names))

  report = classification_report(test_df['class'], test_predictions, target_names=class_names, output_dict=True)
  report['F1_score_Test'] = round(f1_score(test_df['class'], test_predictions), 4)
  report['F1_score_Validation'] = round(f1_score(valid_df['class'], predictions), 4)

  print(">>>>> " + str(params.model_name) + ": Salvando Métricas em JSON")
  
  with open(file_metrics_name, 'w') as outfile:
    json.dump(report, outfile)
  
  print(">>>>> " + str(params.model_name) + ": Gerando matriz de confusão e salvando")
  cnf_matrix = confusion_matrix(test_df['class'], test_predictions)
  df_cm = pd.DataFrame(cnf_matrix, index=class_names, columns=class_names)

  show_confusion_matrix(df_cm, str(params.model_name) + ' - ' + str(params.model_type), file_matriz_name)

  print(">>>>> " + str(params.model_name) + ": Salvando Arquivo com predições de Dados de Teste")
  df_final = pd.DataFrame(columns=['sentence','class','predicted_class'])
  df_final['sentence'] = test_df['sentence']
  df_final['class'] = test_df['class']
  df_final['predicted_class'] = test_predictions

  df_final.to_csv(file_predictions_name, sep=';', encoding='utf-8-sig') 
  
  text_file = open(file_full_args_name, "w")
  text_file.write(str(model_args))
  text_file.close()

  print(">>>>>>>>>>>>>>> " + str(params.model_name) + ": Fim")

INFO:filelock:Lock 140462844925936 acquired on /root/.cache/huggingface/transformers/c534071830642050813fa94003dbf1234413b3f1d5dc66d259fbc82ff7d5fd59.c8340a82acfbbcd2dd960b86d2886ee120b21896ef0294150f0391918ae6ced5.lock


Modelo: Model_1  | Batch Size: 16  | Learning Rate: 5e-05  | Épocas: 2


Downloading:   0%|          | 0.00/648 [00:00<?, ?B/s]

INFO:filelock:Lock 140462844925936 released on /root/.cache/huggingface/transformers/c534071830642050813fa94003dbf1234413b3f1d5dc66d259fbc82ff7d5fd59.c8340a82acfbbcd2dd960b86d2886ee120b21896ef0294150f0391918ae6ced5.lock
INFO:filelock:Lock 140463756874752 acquired on /root/.cache/huggingface/transformers/016fb7702039667c9fb9dd2ceffaf04027b13e525a6248cda2a4a87dbb8687af.881d7200bce807f871637ac9d552c541b2d4b00146a0bf1ab0360f3640031273.lock


Downloading:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

INFO:filelock:Lock 140463756874752 released on /root/.cache/huggingface/transformers/016fb7702039667c9fb9dd2ceffaf04027b13e525a6248cda2a4a87dbb8687af.881d7200bce807f871637ac9d552c541b2d4b00146a0bf1ab0360f3640031273.lock
Some weights of the model checkpoint at neuralmind/bert-large-portuguese-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of

Downloading:   0%|          | 0.00/210k [00:00<?, ?B/s]

INFO:filelock:Lock 140462699578928 released on /root/.cache/huggingface/transformers/9cfcd25de0a333b1b5f4a3db227e93a806cfb041d93a49221eeaee6773eaa41c.af25fb1e29ad0175300146695fd80069be69b211c52fa5486fa8aae2754cc814.lock
INFO:filelock:Lock 140462698997352 acquired on /root/.cache/huggingface/transformers/6a3aa038873b8f0d0ab3a4de0a658f063b89e3afd815920a5f393c0e4ae84259.5cc6e825eb228a7a5cfd27cb4d7151e97a79fb962b31aaf1813aa102e746584b.lock


Downloading:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

INFO:filelock:Lock 140462698997352 released on /root/.cache/huggingface/transformers/6a3aa038873b8f0d0ab3a4de0a658f063b89e3afd815920a5f393c0e4ae84259.5cc6e825eb228a7a5cfd27cb4d7151e97a79fb962b31aaf1813aa102e746584b.lock
INFO:filelock:Lock 140462843694720 acquired on /root/.cache/huggingface/transformers/d5b721c156180bbbcc4a1017e8c72a18f8f96cdc178acec5ddcd45905712b4cf.dd8bd9bfd3664b530ea4e645105f557769387b3da9f79bdb55ed556bdd80611d.lock


Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]

INFO:filelock:Lock 140462843694720 released on /root/.cache/huggingface/transformers/d5b721c156180bbbcc4a1017e8c72a18f8f96cdc178acec5ddcd45905712b4cf.dd8bd9bfd3664b530ea4e645105f557769387b3da9f79bdb55ed556bdd80611d.lock
INFO:filelock:Lock 140462843694720 acquired on /root/.cache/huggingface/transformers/3a44fa9a74e90f509368a7f2789df38e1fedd153a52c62ef5cc5f4b0f5c99c2a.d61b68f744aef2741575c270d4ba0228cd35693bfa15d8babfb5c1079062d5d7.lock


Downloading:   0%|          | 0.00/155 [00:00<?, ?B/s]

INFO:filelock:Lock 140462843694720 released on /root/.cache/huggingface/transformers/3a44fa9a74e90f509368a7f2789df38e1fedd153a52c62ef5cc5f4b0f5c99c2a.d61b68f744aef2741575c270d4ba0228cd35693bfa15d8babfb5c1079062d5d7.lock


>>>>> Model_1: Treinando modelo


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


  0%|          | 0/2630 [00:00<?, ?it/s]

Epoch:   0%|          | 0/2 [00:00<?, ?it/s]

Running Epoch 0 of 2:   0%|          | 0/165 [00:00<?, ?it/s]

Running Epoch 1 of 2:   0%|          | 0/165 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of bert model complete. Saved to outputs/.
  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


>>>>> Model_1: Validando com dados de Validação


  0%|          | 0/329 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/42 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.6227615161700001, 'tp': 122, 'tn': 145, 'fp': 36, 'fn': 26, 'auroc': 0.8793489622218903, 'auprc': 0.8675708774134019, 'eval_loss': 0.4347010354200999}
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


>>>>> Model_1: Calculando F-Score de Validação
>>>>>>>>>> Arg-Max F1-score Validation: 0.7974
>>>>>>>>>> Sigmoide F1-score Validation: 0.7974
>>>>> Model_1: Predizendo valores


  0%|          | 0/329 [00:00<?, ?it/s]

  0%|          | 0/42 [00:00<?, ?it/s]

>>>>> Model_1: Métricas
              precision    recall  f1-score   support

    negativa       0.82      0.72      0.77       180
    positiva       0.71      0.81      0.75       149

    accuracy                           0.76       329
   macro avg       0.76      0.76      0.76       329
weighted avg       0.77      0.76      0.76       329

>>>>> Model_1: Salvando Métricas em JSON
>>>>> Model_1: Gerando matriz de confusão e salvando
>>>>> Model_1: Salvando Arquivo com predições de Dados de Teste
>>>>>>>>>>>>>>> Model_1: Fim
Modelo: Model_2  | Batch Size: 16  | Learning Rate: 5e-05  | Épocas: 3


Some weights of the model checkpoint at neuralmind/bert-large-portuguese-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from th

>>>>> Model_2: Treinando modelo


INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


  0%|          | 0/2630 [00:00<?, ?it/s]

Epoch:   0%|          | 0/3 [00:00<?, ?it/s]

Running Epoch 0 of 3:   0%|          | 0/165 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/165 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/165 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of bert model complete. Saved to outputs/.
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


>>>>> Model_2: Validando com dados de Validação


  0%|          | 0/329 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/42 [00:00<?, ?it/s]

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
INFO:simpletransformers.classification.classification_model:{'mcc': 0.0, 'tp': 0, 'tn': 181, 'fp': 0, 'fn': 148, 'auroc': 0.5089405704046588, 'auprc': 0.45687938026142944, 'eval_loss': 0.6906162017867679}
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


>>>>> Model_2: Calculando F-Score de Validação
>>>>>>>>>> Arg-Max F1-score Validation: 0.0
>>>>>>>>>> Sigmoide F1-score Validation: 0.0
>>>>> Model_2: Predizendo valores


  0%|          | 0/329 [00:00<?, ?it/s]

  0%|          | 0/42 [00:00<?, ?it/s]

>>>>> Model_2: Métricas
              precision    recall  f1-score   support

    negativa       0.55      1.00      0.71       180
    positiva       0.00      0.00      0.00       149

    accuracy                           0.55       329
   macro avg       0.27      0.50      0.35       329
weighted avg       0.30      0.55      0.39       329

>>>>> Model_2: Salvando Métricas em JSON
>>>>> Model_2: Gerando matriz de confusão e salvando
>>>>> Model_2: Salvando Arquivo com predições de Dados de Teste
>>>>>>>>>>>>>>> Model_2: Fim
Modelo: Model_3  | Batch Size: 16  | Learning Rate: 5e-05  | Épocas: 4


  _warn_prf(average, modifier, msg_start, len(result))
Some weights of the model checkpoint at neuralmind/bert-large-portuguese-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of Ber

>>>>> Model_3: Treinando modelo


INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


  0%|          | 0/2630 [00:00<?, ?it/s]

Epoch:   0%|          | 0/4 [00:00<?, ?it/s]

Running Epoch 0 of 4:   0%|          | 0/165 [00:00<?, ?it/s]

Running Epoch 1 of 4:   0%|          | 0/165 [00:00<?, ?it/s]

Running Epoch 2 of 4:   0%|          | 0/165 [00:00<?, ?it/s]

Running Epoch 3 of 4:   0%|          | 0/165 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of bert model complete. Saved to outputs/.
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


>>>>> Model_3: Validando com dados de Validação


  0%|          | 0/329 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/42 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.0, 'tp': 0, 'tn': 181, 'fp': 0, 'fn': 148, 'auroc': 0.47754591608182767, 'auprc': 0.43729005597165554, 'eval_loss': 0.6926984134174529}
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


>>>>> Model_3: Calculando F-Score de Validação
>>>>>>>>>> Arg-Max F1-score Validation: 0.0
>>>>>>>>>> Sigmoide F1-score Validation: 0.0
>>>>> Model_3: Predizendo valores


  0%|          | 0/329 [00:00<?, ?it/s]

  0%|          | 0/42 [00:00<?, ?it/s]

>>>>> Model_3: Métricas
              precision    recall  f1-score   support

    negativa       0.55      1.00      0.71       180
    positiva       0.00      0.00      0.00       149

    accuracy                           0.55       329
   macro avg       0.27      0.50      0.35       329
weighted avg       0.30      0.55      0.39       329

>>>>> Model_3: Salvando Métricas em JSON
>>>>> Model_3: Gerando matriz de confusão e salvando
>>>>> Model_3: Salvando Arquivo com predições de Dados de Teste
>>>>>>>>>>>>>>> Model_3: Fim
Modelo: Model_4  | Batch Size: 16  | Learning Rate: 5e-05  | Épocas: 5


Some weights of the model checkpoint at neuralmind/bert-large-portuguese-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from th

>>>>> Model_4: Treinando modelo


INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


  0%|          | 0/2630 [00:00<?, ?it/s]

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/165 [00:00<?, ?it/s]

Running Epoch 1 of 5:   0%|          | 0/165 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/165 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/165 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/165 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of bert model complete. Saved to outputs/.
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


>>>>> Model_4: Validando com dados de Validação


  0%|          | 0/329 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/42 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.0, 'tp': 0, 'tn': 181, 'fp': 0, 'fn': 148, 'auroc': 0.5426870240406153, 'auprc': 0.47306231222773154, 'eval_loss': 0.6907337875593276}
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


>>>>> Model_4: Calculando F-Score de Validação
>>>>>>>>>> Arg-Max F1-score Validation: 0.0
>>>>>>>>>> Sigmoide F1-score Validation: 0.0
>>>>> Model_4: Predizendo valores


  0%|          | 0/329 [00:00<?, ?it/s]

  0%|          | 0/42 [00:00<?, ?it/s]

>>>>> Model_4: Métricas
              precision    recall  f1-score   support

    negativa       0.55      1.00      0.71       180
    positiva       0.00      0.00      0.00       149

    accuracy                           0.55       329
   macro avg       0.27      0.50      0.35       329
weighted avg       0.30      0.55      0.39       329

>>>>> Model_4: Salvando Métricas em JSON
>>>>> Model_4: Gerando matriz de confusão e salvando
>>>>> Model_4: Salvando Arquivo com predições de Dados de Teste
>>>>>>>>>>>>>>> Model_4: Fim
Modelo: Model_5  | Batch Size: 16  | Learning Rate: 3e-05  | Épocas: 2


Some weights of the model checkpoint at neuralmind/bert-large-portuguese-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from th

>>>>> Model_5: Treinando modelo


INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


  0%|          | 0/2630 [00:00<?, ?it/s]

Epoch:   0%|          | 0/2 [00:00<?, ?it/s]

Running Epoch 0 of 2:   0%|          | 0/165 [00:00<?, ?it/s]

Running Epoch 1 of 2:   0%|          | 0/165 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of bert model complete. Saved to outputs/.
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


>>>>> Model_5: Validando com dados de Validação


  0%|          | 0/329 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/42 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.5229763220599102, 'tp': 121, 'tn': 128, 'fp': 53, 'fn': 27, 'auroc': 0.8280200089592356, 'auprc': 0.7873768036798972, 'eval_loss': 0.509589172899723}
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


>>>>> Model_5: Calculando F-Score de Validação
>>>>>>>>>> Arg-Max F1-score Validation: 0.7516
>>>>>>>>>> Sigmoide F1-score Validation: 0.7516
>>>>> Model_5: Predizendo valores


  0%|          | 0/329 [00:00<?, ?it/s]

  0%|          | 0/42 [00:00<?, ?it/s]

>>>>> Model_5: Métricas
              precision    recall  f1-score   support

    negativa       0.76      0.66      0.70       180
    positiva       0.64      0.74      0.69       149

    accuracy                           0.70       329
   macro avg       0.70      0.70      0.70       329
weighted avg       0.70      0.70      0.70       329

>>>>> Model_5: Salvando Métricas em JSON
>>>>> Model_5: Gerando matriz de confusão e salvando
>>>>> Model_5: Salvando Arquivo com predições de Dados de Teste
>>>>>>>>>>>>>>> Model_5: Fim
Modelo: Model_6  | Batch Size: 16  | Learning Rate: 3e-05  | Épocas: 3


Some weights of the model checkpoint at neuralmind/bert-large-portuguese-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from th

>>>>> Model_6: Treinando modelo


INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


  0%|          | 0/2630 [00:00<?, ?it/s]

Epoch:   0%|          | 0/3 [00:00<?, ?it/s]

Running Epoch 0 of 3:   0%|          | 0/165 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/165 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/165 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of bert model complete. Saved to outputs/.
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


>>>>> Model_6: Validando com dados de Validação


  0%|          | 0/329 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/42 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.5703496308887782, 'tp': 124, 'tn': 133, 'fp': 48, 'fn': 24, 'auroc': 0.8771464835000746, 'auprc': 0.8560822506147472, 'eval_loss': 0.526913615863859}
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


>>>>> Model_6: Calculando F-Score de Validação
>>>>>>>>>> Arg-Max F1-score Validation: 0.775
>>>>>>>>>> Sigmoide F1-score Validation: 0.775
>>>>> Model_6: Predizendo valores


  0%|          | 0/329 [00:00<?, ?it/s]

  0%|          | 0/42 [00:00<?, ?it/s]

>>>>> Model_6: Métricas
              precision    recall  f1-score   support

    negativa       0.83      0.69      0.76       180
    positiva       0.69      0.83      0.76       149

    accuracy                           0.76       329
   macro avg       0.76      0.76      0.76       329
weighted avg       0.77      0.76      0.76       329

>>>>> Model_6: Salvando Métricas em JSON
>>>>> Model_6: Gerando matriz de confusão e salvando
>>>>> Model_6: Salvando Arquivo com predições de Dados de Teste
>>>>>>>>>>>>>>> Model_6: Fim
Modelo: Model_7  | Batch Size: 16  | Learning Rate: 3e-05  | Épocas: 4


Some weights of the model checkpoint at neuralmind/bert-large-portuguese-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from th

>>>>> Model_7: Treinando modelo


INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


  0%|          | 0/2630 [00:00<?, ?it/s]

Epoch:   0%|          | 0/4 [00:00<?, ?it/s]

Running Epoch 0 of 4:   0%|          | 0/165 [00:00<?, ?it/s]

Running Epoch 1 of 4:   0%|          | 0/165 [00:00<?, ?it/s]

Running Epoch 2 of 4:   0%|          | 0/165 [00:00<?, ?it/s]

Running Epoch 3 of 4:   0%|          | 0/165 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of bert model complete. Saved to outputs/.
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


>>>>> Model_7: Validando com dados de Validação


  0%|          | 0/329 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/42 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.6370197721030828, 'tp': 125, 'tn': 144, 'fp': 37, 'fn': 23, 'auroc': 0.8959235478572496, 'auprc': 0.8704840319352534, 'eval_loss': 0.650083947050873}
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


>>>>> Model_7: Calculando F-Score de Validação
>>>>>>>>>> Arg-Max F1-score Validation: 0.8065
>>>>>>>>>> Sigmoide F1-score Validation: 0.8065
>>>>> Model_7: Predizendo valores


  0%|          | 0/329 [00:00<?, ?it/s]

  0%|          | 0/42 [00:00<?, ?it/s]

>>>>> Model_7: Métricas
              precision    recall  f1-score   support

    negativa       0.88      0.79      0.83       180
    positiva       0.77      0.87      0.82       149

    accuracy                           0.83       329
   macro avg       0.83      0.83      0.83       329
weighted avg       0.83      0.83      0.83       329

>>>>> Model_7: Salvando Métricas em JSON
>>>>> Model_7: Gerando matriz de confusão e salvando
>>>>> Model_7: Salvando Arquivo com predições de Dados de Teste
>>>>>>>>>>>>>>> Model_7: Fim
Modelo: Model_8  | Batch Size: 16  | Learning Rate: 3e-05  | Épocas: 5


Some weights of the model checkpoint at neuralmind/bert-large-portuguese-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from th

>>>>> Model_8: Treinando modelo


INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


  0%|          | 0/2630 [00:00<?, ?it/s]

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/165 [00:00<?, ?it/s]

Running Epoch 1 of 5:   0%|          | 0/165 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/165 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/165 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/165 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of bert model complete. Saved to outputs/.
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


>>>>> Model_8: Validando com dados de Validação


  0%|          | 0/329 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/42 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.7075324148597633, 'tp': 128, 'tn': 153, 'fp': 28, 'fn': 20, 'auroc': 0.9225399432581753, 'auprc': 0.9010652698319892, 'eval_loss': 0.6934945343721969}
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


>>>>> Model_8: Calculando F-Score de Validação
>>>>>>>>>> Arg-Max F1-score Validation: 0.8421
>>>>>>>>>> Sigmoide F1-score Validation: 0.8421
>>>>> Model_8: Predizendo valores


  0%|          | 0/329 [00:00<?, ?it/s]

  0%|          | 0/42 [00:00<?, ?it/s]

>>>>> Model_8: Métricas
              precision    recall  f1-score   support

    negativa       0.88      0.86      0.87       180
    positiva       0.84      0.85      0.84       149

    accuracy                           0.86       329
   macro avg       0.86      0.86      0.86       329
weighted avg       0.86      0.86      0.86       329

>>>>> Model_8: Salvando Métricas em JSON
>>>>> Model_8: Gerando matriz de confusão e salvando
>>>>> Model_8: Salvando Arquivo com predições de Dados de Teste
>>>>>>>>>>>>>>> Model_8: Fim
Modelo: Model_9  | Batch Size: 16  | Learning Rate: 2e-05  | Épocas: 2


Some weights of the model checkpoint at neuralmind/bert-large-portuguese-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from th

>>>>> Model_9: Treinando modelo


INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


  0%|          | 0/2630 [00:00<?, ?it/s]

Epoch:   0%|          | 0/2 [00:00<?, ?it/s]

Running Epoch 0 of 2:   0%|          | 0/165 [00:00<?, ?it/s]

Running Epoch 1 of 2:   0%|          | 0/165 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of bert model complete. Saved to outputs/.
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


>>>>> Model_9: Validando com dados de Validação


  0%|          | 0/329 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/42 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.10264928457282582, 'tp': 92, 'tn': 87, 'fp': 94, 'fn': 56, 'auroc': 0.5488651635060475, 'auprc': 0.4893881746976051, 'eval_loss': 0.6932126900979451}
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


>>>>> Model_9: Calculando F-Score de Validação
>>>>>>>>>> Arg-Max F1-score Validation: 0.5509
>>>>>>>>>> Sigmoide F1-score Validation: 0.5509
>>>>> Model_9: Predizendo valores


  0%|          | 0/329 [00:00<?, ?it/s]

  0%|          | 0/42 [00:00<?, ?it/s]

>>>>> Model_9: Métricas
              precision    recall  f1-score   support

    negativa       0.54      0.42      0.47       180
    positiva       0.44      0.56      0.49       149

    accuracy                           0.48       329
   macro avg       0.49      0.49      0.48       329
weighted avg       0.49      0.48      0.48       329

>>>>> Model_9: Salvando Métricas em JSON
>>>>> Model_9: Gerando matriz de confusão e salvando
>>>>> Model_9: Salvando Arquivo com predições de Dados de Teste
>>>>>>>>>>>>>>> Model_9: Fim
Modelo: Model_10  | Batch Size: 16  | Learning Rate: 2e-05  | Épocas: 3


Some weights of the model checkpoint at neuralmind/bert-large-portuguese-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from th

>>>>> Model_10: Treinando modelo


INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


  0%|          | 0/2630 [00:00<?, ?it/s]

Epoch:   0%|          | 0/3 [00:00<?, ?it/s]

Running Epoch 0 of 3:   0%|          | 0/165 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/165 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/165 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of bert model complete. Saved to outputs/.
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


>>>>> Model_10: Validando com dados de Validação


  0%|          | 0/329 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/42 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.5270299449879607, 'tp': 116, 'tn': 135, 'fp': 46, 'fn': 32, 'auroc': 0.8324996266985217, 'auprc': 0.7806382773965548, 'eval_loss': 0.5369084482746465}
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


>>>>> Model_10: Calculando F-Score de Validação
>>>>>>>>>> Arg-Max F1-score Validation: 0.7484
>>>>>>>>>> Sigmoide F1-score Validation: 0.7484
>>>>> Model_10: Predizendo valores


  0%|          | 0/329 [00:00<?, ?it/s]

  0%|          | 0/42 [00:00<?, ?it/s]

>>>>> Model_10: Métricas
              precision    recall  f1-score   support

    negativa       0.79      0.68      0.73       180
    positiva       0.67      0.79      0.72       149

    accuracy                           0.73       329
   macro avg       0.73      0.73      0.73       329
weighted avg       0.74      0.73      0.73       329

>>>>> Model_10: Salvando Métricas em JSON
>>>>> Model_10: Gerando matriz de confusão e salvando
>>>>> Model_10: Salvando Arquivo com predições de Dados de Teste
>>>>>>>>>>>>>>> Model_10: Fim
Modelo: Model_11  | Batch Size: 16  | Learning Rate: 2e-05  | Épocas: 4


Some weights of the model checkpoint at neuralmind/bert-large-portuguese-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from th

>>>>> Model_11: Treinando modelo


INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


  0%|          | 0/2630 [00:00<?, ?it/s]

Epoch:   0%|          | 0/4 [00:00<?, ?it/s]

Running Epoch 0 of 4:   0%|          | 0/165 [00:00<?, ?it/s]

Running Epoch 1 of 4:   0%|          | 0/165 [00:00<?, ?it/s]

Running Epoch 2 of 4:   0%|          | 0/165 [00:00<?, ?it/s]

Running Epoch 3 of 4:   0%|          | 0/165 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of bert model complete. Saved to outputs/.
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


>>>>> Model_11: Validando com dados de Validação


  0%|          | 0/329 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/42 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.5270299449879607, 'tp': 116, 'tn': 135, 'fp': 46, 'fn': 32, 'auroc': 0.8286919516201283, 'auprc': 0.7773558035735053, 'eval_loss': 0.6981936245269719}
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


>>>>> Model_11: Calculando F-Score de Validação
>>>>>>>>>> Arg-Max F1-score Validation: 0.7484
>>>>>>>>>> Sigmoide F1-score Validation: 0.7484
>>>>> Model_11: Predizendo valores


  0%|          | 0/329 [00:00<?, ?it/s]

  0%|          | 0/42 [00:00<?, ?it/s]

>>>>> Model_11: Métricas
              precision    recall  f1-score   support

    negativa       0.83      0.69      0.76       180
    positiva       0.69      0.83      0.75       149

    accuracy                           0.75       329
   macro avg       0.76      0.76      0.75       329
weighted avg       0.77      0.75      0.75       329

>>>>> Model_11: Salvando Métricas em JSON
>>>>> Model_11: Gerando matriz de confusão e salvando
>>>>> Model_11: Salvando Arquivo com predições de Dados de Teste
>>>>>>>>>>>>>>> Model_11: Fim
Modelo: Model_12  | Batch Size: 16  | Learning Rate: 2e-05  | Épocas: 5


Some weights of the model checkpoint at neuralmind/bert-large-portuguese-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from th

>>>>> Model_12: Treinando modelo


INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


  0%|          | 0/2630 [00:00<?, ?it/s]

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/165 [00:00<?, ?it/s]

Running Epoch 1 of 5:   0%|          | 0/165 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/165 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/165 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/165 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of bert model complete. Saved to outputs/.
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


>>>>> Model_12: Validando com dados de Validação


  0%|          | 0/329 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/42 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.6415762213187122, 'tp': 124, 'tn': 146, 'fp': 35, 'fn': 24, 'auroc': 0.9163431387188293, 'auprc': 0.8956077361733714, 'eval_loss': 0.5195270135910028}
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


>>>>> Model_12: Calculando F-Score de Validação
>>>>>>>>>> Arg-Max F1-score Validation: 0.8078
>>>>>>>>>> Sigmoide F1-score Validation: 0.8078
>>>>> Model_12: Predizendo valores


  0%|          | 0/329 [00:00<?, ?it/s]

  0%|          | 0/42 [00:00<?, ?it/s]

>>>>> Model_12: Métricas
              precision    recall  f1-score   support

    negativa       0.85      0.83      0.84       180
    positiva       0.80      0.83      0.81       149

    accuracy                           0.83       329
   macro avg       0.83      0.83      0.83       329
weighted avg       0.83      0.83      0.83       329

>>>>> Model_12: Salvando Métricas em JSON
>>>>> Model_12: Gerando matriz de confusão e salvando
>>>>> Model_12: Salvando Arquivo com predições de Dados de Teste
>>>>>>>>>>>>>>> Model_12: Fim
Modelo: Model_13  | Batch Size: 32  | Learning Rate: 5e-05  | Épocas: 2


Some weights of the model checkpoint at neuralmind/bert-large-portuguese-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from th

>>>>> Model_13: Treinando modelo


INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


  0%|          | 0/2630 [00:00<?, ?it/s]

Epoch:   0%|          | 0/2 [00:00<?, ?it/s]

Running Epoch 0 of 2:   0%|          | 0/83 [00:00<?, ?it/s]

Running Epoch 1 of 2:   0%|          | 0/83 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of bert model complete. Saved to outputs/.
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


>>>>> Model_13: Validando com dados de Validação


  0%|          | 0/329 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/42 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.4814055306487305, 'tp': 97, 'tn': 148, 'fp': 33, 'fn': 51, 'auroc': 0.8263028221591757, 'auprc': 0.770860846311096, 'eval_loss': 0.5059400559181259}
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


>>>>> Model_13: Calculando F-Score de Validação
>>>>>>>>>> Arg-Max F1-score Validation: 0.6978
>>>>>>>>>> Sigmoide F1-score Validation: 0.6978
>>>>> Model_13: Predizendo valores


  0%|          | 0/329 [00:00<?, ?it/s]

  0%|          | 0/42 [00:00<?, ?it/s]

>>>>> Model_13: Métricas
              precision    recall  f1-score   support

    negativa       0.74      0.78      0.76       180
    positiva       0.71      0.67      0.69       149

    accuracy                           0.73       329
   macro avg       0.73      0.72      0.73       329
weighted avg       0.73      0.73      0.73       329

>>>>> Model_13: Salvando Métricas em JSON
>>>>> Model_13: Gerando matriz de confusão e salvando
>>>>> Model_13: Salvando Arquivo com predições de Dados de Teste
>>>>>>>>>>>>>>> Model_13: Fim
Modelo: Model_14  | Batch Size: 32  | Learning Rate: 5e-05  | Épocas: 3


Some weights of the model checkpoint at neuralmind/bert-large-portuguese-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from th

>>>>> Model_14: Treinando modelo


INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


  0%|          | 0/2630 [00:00<?, ?it/s]

Epoch:   0%|          | 0/3 [00:00<?, ?it/s]

Running Epoch 0 of 3:   0%|          | 0/83 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/83 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/83 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of bert model complete. Saved to outputs/.
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


>>>>> Model_14: Validando com dados de Validação


  0%|          | 0/329 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/42 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.6816673884006055, 'tp': 124, 'tn': 153, 'fp': 28, 'fn': 24, 'auroc': 0.9219426608929372, 'auprc': 0.9035145559057656, 'eval_loss': 0.3776442083707523}
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


>>>>> Model_14: Calculando F-Score de Validação
>>>>>>>>>> Arg-Max F1-score Validation: 0.8267
>>>>>>>>>> Sigmoide F1-score Validation: 0.8267
>>>>> Model_14: Predizendo valores


  0%|          | 0/329 [00:00<?, ?it/s]

  0%|          | 0/42 [00:00<?, ?it/s]

>>>>> Model_14: Métricas
              precision    recall  f1-score   support

    negativa       0.84      0.84      0.84       180
    positiva       0.81      0.81      0.81       149

    accuracy                           0.83       329
   macro avg       0.83      0.82      0.83       329
weighted avg       0.83      0.83      0.83       329

>>>>> Model_14: Salvando Métricas em JSON
>>>>> Model_14: Gerando matriz de confusão e salvando
>>>>> Model_14: Salvando Arquivo com predições de Dados de Teste
>>>>>>>>>>>>>>> Model_14: Fim
Modelo: Model_15  | Batch Size: 32  | Learning Rate: 5e-05  | Épocas: 4


Some weights of the model checkpoint at neuralmind/bert-large-portuguese-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from th

>>>>> Model_15: Treinando modelo


INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


  0%|          | 0/2630 [00:00<?, ?it/s]

Epoch:   0%|          | 0/4 [00:00<?, ?it/s]

Running Epoch 0 of 4:   0%|          | 0/83 [00:00<?, ?it/s]

Running Epoch 1 of 4:   0%|          | 0/83 [00:00<?, ?it/s]

Running Epoch 2 of 4:   0%|          | 0/83 [00:00<?, ?it/s]

Running Epoch 3 of 4:   0%|          | 0/83 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of bert model complete. Saved to outputs/.
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


>>>>> Model_15: Validando com dados de Validação


  0%|          | 0/329 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/42 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.6925763999004472, 'tp': 130, 'tn': 148, 'fp': 33, 'fn': 18, 'auroc': 0.9230065701060176, 'auprc': 0.8939295995250092, 'eval_loss': 0.5043992509544339}
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


>>>>> Model_15: Calculando F-Score de Validação
>>>>>>>>>> Arg-Max F1-score Validation: 0.836
>>>>>>>>>> Sigmoide F1-score Validation: 0.836
>>>>> Model_15: Predizendo valores


  0%|          | 0/329 [00:00<?, ?it/s]

  0%|          | 0/42 [00:00<?, ?it/s]

>>>>> Model_15: Métricas
              precision    recall  f1-score   support

    negativa       0.91      0.84      0.87       180
    positiva       0.82      0.90      0.86       149

    accuracy                           0.87       329
   macro avg       0.87      0.87      0.87       329
weighted avg       0.87      0.87      0.87       329

>>>>> Model_15: Salvando Métricas em JSON
>>>>> Model_15: Gerando matriz de confusão e salvando
>>>>> Model_15: Salvando Arquivo com predições de Dados de Teste
>>>>>>>>>>>>>>> Model_15: Fim
Modelo: Model_16  | Batch Size: 32  | Learning Rate: 5e-05  | Épocas: 5


Some weights of the model checkpoint at neuralmind/bert-large-portuguese-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from th

>>>>> Model_16: Treinando modelo


INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


  0%|          | 0/2630 [00:00<?, ?it/s]

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/83 [00:00<?, ?it/s]

Running Epoch 1 of 5:   0%|          | 0/83 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/83 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/83 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/83 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of bert model complete. Saved to outputs/.
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


>>>>> Model_16: Validando com dados de Validação


  0%|          | 0/329 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/42 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.7075324148597633, 'tp': 128, 'tn': 153, 'fp': 28, 'fn': 20, 'auroc': 0.9233238763625504, 'auprc': 0.9089828669876028, 'eval_loss': 0.5155961722678816}
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


>>>>> Model_16: Calculando F-Score de Validação
>>>>>>>>>> Arg-Max F1-score Validation: 0.8421
>>>>>>>>>> Sigmoide F1-score Validation: 0.8421
>>>>> Model_16: Predizendo valores


  0%|          | 0/329 [00:00<?, ?it/s]

  0%|          | 0/42 [00:00<?, ?it/s]

>>>>> Model_16: Métricas
              precision    recall  f1-score   support

    negativa       0.87      0.82      0.84       180
    positiva       0.79      0.85      0.82       149

    accuracy                           0.83       329
   macro avg       0.83      0.83      0.83       329
weighted avg       0.84      0.83      0.83       329

>>>>> Model_16: Salvando Métricas em JSON
>>>>> Model_16: Gerando matriz de confusão e salvando
>>>>> Model_16: Salvando Arquivo com predições de Dados de Teste
>>>>>>>>>>>>>>> Model_16: Fim
Modelo: Model_17  | Batch Size: 32  | Learning Rate: 3e-05  | Épocas: 2


Some weights of the model checkpoint at neuralmind/bert-large-portuguese-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from th

>>>>> Model_17: Treinando modelo


INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


  0%|          | 0/2630 [00:00<?, ?it/s]

Epoch:   0%|          | 0/2 [00:00<?, ?it/s]

Running Epoch 0 of 2:   0%|          | 0/83 [00:00<?, ?it/s]

Running Epoch 1 of 2:   0%|          | 0/83 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of bert model complete. Saved to outputs/.
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


>>>>> Model_17: Validando com dados de Validação


  0%|          | 0/329 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/42 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.4016317571525748, 'tp': 103, 'tn': 128, 'fp': 53, 'fn': 45, 'auroc': 0.7813573241750037, 'auprc': 0.7286377356612841, 'eval_loss': 0.5568996987172535}
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


>>>>> Model_17: Calculando F-Score de Validação
>>>>>>>>>> Arg-Max F1-score Validation: 0.6776
>>>>>>>>>> Sigmoide F1-score Validation: 0.6776
>>>>> Model_17: Predizendo valores


  0%|          | 0/329 [00:00<?, ?it/s]

  0%|          | 0/42 [00:00<?, ?it/s]

>>>>> Model_17: Métricas
              precision    recall  f1-score   support

    negativa       0.70      0.59      0.64       180
    positiva       0.58      0.69      0.63       149

    accuracy                           0.64       329
   macro avg       0.64      0.64      0.64       329
weighted avg       0.65      0.64      0.64       329

>>>>> Model_17: Salvando Métricas em JSON
>>>>> Model_17: Gerando matriz de confusão e salvando
>>>>> Model_17: Salvando Arquivo com predições de Dados de Teste
>>>>>>>>>>>>>>> Model_17: Fim
Modelo: Model_18  | Batch Size: 32  | Learning Rate: 3e-05  | Épocas: 3


Some weights of the model checkpoint at neuralmind/bert-large-portuguese-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from th

>>>>> Model_18: Treinando modelo


INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


  0%|          | 0/2630 [00:00<?, ?it/s]

Epoch:   0%|          | 0/3 [00:00<?, ?it/s]

Running Epoch 0 of 3:   0%|          | 0/83 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/83 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/83 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of bert model complete. Saved to outputs/.
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


>>>>> Model_18: Validando com dados de Validação


  0%|          | 0/329 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/42 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.5012956484923293, 'tp': 105, 'tn': 143, 'fp': 38, 'fn': 43, 'auroc': 0.8353553830073167, 'auprc': 0.7838410792514827, 'eval_loss': 0.49795835394234883}
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


>>>>> Model_18: Calculando F-Score de Validação
>>>>>>>>>> Arg-Max F1-score Validation: 0.7216
>>>>>>>>>> Sigmoide F1-score Validation: 0.7216
>>>>> Model_18: Predizendo valores


  0%|          | 0/329 [00:00<?, ?it/s]

  0%|          | 0/42 [00:00<?, ?it/s]

>>>>> Model_18: Métricas
              precision    recall  f1-score   support

    negativa       0.76      0.72      0.74       180
    positiva       0.69      0.73      0.71       149

    accuracy                           0.73       329
   macro avg       0.73      0.73      0.73       329
weighted avg       0.73      0.73      0.73       329

>>>>> Model_18: Salvando Métricas em JSON
>>>>> Model_18: Gerando matriz de confusão e salvando
>>>>> Model_18: Salvando Arquivo com predições de Dados de Teste
>>>>>>>>>>>>>>> Model_18: Fim
Modelo: Model_19  | Batch Size: 32  | Learning Rate: 3e-05  | Épocas: 4


Some weights of the model checkpoint at neuralmind/bert-large-portuguese-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from th

>>>>> Model_19: Treinando modelo


INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


  0%|          | 0/2630 [00:00<?, ?it/s]

Epoch:   0%|          | 0/4 [00:00<?, ?it/s]

Running Epoch 0 of 4:   0%|          | 0/83 [00:00<?, ?it/s]

Running Epoch 1 of 4:   0%|          | 0/83 [00:00<?, ?it/s]

Running Epoch 2 of 4:   0%|          | 0/83 [00:00<?, ?it/s]

Running Epoch 3 of 4:   0%|          | 0/83 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of bert model complete. Saved to outputs/.
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


>>>>> Model_19: Validando com dados de Validação


  0%|          | 0/329 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/42 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.6039608477286385, 'tp': 120, 'tn': 144, 'fp': 37, 'fn': 28, 'auroc': 0.8920785426310288, 'auprc': 0.8581731839320396, 'eval_loss': 0.47178200560267125}
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


>>>>> Model_19: Calculando F-Score de Validação
>>>>>>>>>> Arg-Max F1-score Validation: 0.7869
>>>>>>>>>> Sigmoide F1-score Validation: 0.7869
>>>>> Model_19: Predizendo valores


  0%|          | 0/329 [00:00<?, ?it/s]

  0%|          | 0/42 [00:00<?, ?it/s]

>>>>> Model_19: Métricas
              precision    recall  f1-score   support

    negativa       0.84      0.76      0.80       180
    positiva       0.74      0.83      0.78       149

    accuracy                           0.79       329
   macro avg       0.79      0.79      0.79       329
weighted avg       0.80      0.79      0.79       329

>>>>> Model_19: Salvando Métricas em JSON
>>>>> Model_19: Gerando matriz de confusão e salvando
>>>>> Model_19: Salvando Arquivo com predições de Dados de Teste
>>>>>>>>>>>>>>> Model_19: Fim
Modelo: Model_20  | Batch Size: 32  | Learning Rate: 3e-05  | Épocas: 5


Some weights of the model checkpoint at neuralmind/bert-large-portuguese-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from th

>>>>> Model_20: Treinando modelo


INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


  0%|          | 0/2630 [00:00<?, ?it/s]

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/83 [00:00<?, ?it/s]

Running Epoch 1 of 5:   0%|          | 0/83 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/83 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/83 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/83 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of bert model complete. Saved to outputs/.
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


>>>>> Model_20: Validando com dados de Validação


  0%|          | 0/329 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/42 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.6736830478081235, 'tp': 128, 'tn': 147, 'fp': 34, 'fn': 20, 'auroc': 0.9178363446319246, 'auprc': 0.8916155604823698, 'eval_loss': 0.4858807392662302}
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


>>>>> Model_20: Calculando F-Score de Validação
>>>>>>>>>> Arg-Max F1-score Validation: 0.8258
>>>>>>>>>> Sigmoide F1-score Validation: 0.8258
>>>>> Model_20: Predizendo valores


  0%|          | 0/329 [00:00<?, ?it/s]

  0%|          | 0/42 [00:00<?, ?it/s]

>>>>> Model_20: Métricas
              precision    recall  f1-score   support

    negativa       0.88      0.79      0.83       180
    positiva       0.78      0.87      0.82       149

    accuracy                           0.83       329
   macro avg       0.83      0.83      0.83       329
weighted avg       0.83      0.83      0.83       329

>>>>> Model_20: Salvando Métricas em JSON
>>>>> Model_20: Gerando matriz de confusão e salvando
>>>>> Model_20: Salvando Arquivo com predições de Dados de Teste
>>>>>>>>>>>>>>> Model_20: Fim
Modelo: Model_21  | Batch Size: 32  | Learning Rate: 2e-05  | Épocas: 2


Some weights of the model checkpoint at neuralmind/bert-large-portuguese-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from th

>>>>> Model_21: Treinando modelo


INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


  0%|          | 0/2630 [00:00<?, ?it/s]

Epoch:   0%|          | 0/2 [00:00<?, ?it/s]

Running Epoch 0 of 2:   0%|          | 0/83 [00:00<?, ?it/s]

Running Epoch 1 of 2:   0%|          | 0/83 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of bert model complete. Saved to outputs/.
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


>>>>> Model_21: Validando com dados de Validação


  0%|          | 0/329 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/42 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.3760950159851769, 'tp': 109, 'tn': 116, 'fp': 65, 'fn': 39, 'auroc': 0.769075705539794, 'auprc': 0.7092535468918246, 'eval_loss': 0.5772509028514227}
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


>>>>> Model_21: Calculando F-Score de Validação
>>>>>>>>>> Arg-Max F1-score Validation: 0.677
>>>>>>>>>> Sigmoide F1-score Validation: 0.677
>>>>> Model_21: Predizendo valores


  0%|          | 0/329 [00:00<?, ?it/s]

  0%|          | 0/42 [00:00<?, ?it/s]

>>>>> Model_21: Métricas
              precision    recall  f1-score   support

    negativa       0.70      0.55      0.62       180
    positiva       0.57      0.72      0.64       149

    accuracy                           0.63       329
   macro avg       0.64      0.63      0.63       329
weighted avg       0.64      0.63      0.63       329

>>>>> Model_21: Salvando Métricas em JSON
>>>>> Model_21: Gerando matriz de confusão e salvando
>>>>> Model_21: Salvando Arquivo com predições de Dados de Teste
>>>>>>>>>>>>>>> Model_21: Fim
Modelo: Model_22  | Batch Size: 32  | Learning Rate: 2e-05  | Épocas: 3


Some weights of the model checkpoint at neuralmind/bert-large-portuguese-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from th

>>>>> Model_22: Treinando modelo


INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


  0%|          | 0/2630 [00:00<?, ?it/s]

Epoch:   0%|          | 0/3 [00:00<?, ?it/s]

Running Epoch 0 of 3:   0%|          | 0/83 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/83 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/83 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of bert model complete. Saved to outputs/.
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


>>>>> Model_22: Validando com dados de Validação


  0%|          | 0/329 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/42 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.42709381764314797, 'tp': 106, 'tn': 129, 'fp': 52, 'fn': 42, 'auroc': 0.8100268777064357, 'auprc': 0.7569072142314055, 'eval_loss': 0.5364345415007501}
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


>>>>> Model_22: Calculando F-Score de Validação
>>>>>>>>>> Arg-Max F1-score Validation: 0.6928
>>>>>>>>>> Sigmoide F1-score Validation: 0.6928
>>>>> Model_22: Predizendo valores


  0%|          | 0/329 [00:00<?, ?it/s]

  0%|          | 0/42 [00:00<?, ?it/s]

>>>>> Model_22: Métricas
              precision    recall  f1-score   support

    negativa       0.75      0.71      0.73       180
    positiva       0.67      0.72      0.69       149

    accuracy                           0.71       329
   macro avg       0.71      0.71      0.71       329
weighted avg       0.72      0.71      0.71       329

>>>>> Model_22: Salvando Métricas em JSON
>>>>> Model_22: Gerando matriz de confusão e salvando
>>>>> Model_22: Salvando Arquivo com predições de Dados de Teste
>>>>>>>>>>>>>>> Model_22: Fim
Modelo: Model_23  | Batch Size: 32  | Learning Rate: 2e-05  | Épocas: 4


Some weights of the model checkpoint at neuralmind/bert-large-portuguese-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from th

>>>>> Model_23: Treinando modelo


INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


  0%|          | 0/2630 [00:00<?, ?it/s]

Epoch:   0%|          | 0/4 [00:00<?, ?it/s]

Running Epoch 0 of 4:   0%|          | 0/83 [00:00<?, ?it/s]

Running Epoch 1 of 4:   0%|          | 0/83 [00:00<?, ?it/s]

Running Epoch 2 of 4:   0%|          | 0/83 [00:00<?, ?it/s]

Running Epoch 3 of 4:   0%|          | 0/83 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of bert model complete. Saved to outputs/.
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


>>>>> Model_23: Validando com dados de Validação


  0%|          | 0/329 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/42 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.4969552259751897, 'tp': 118, 'tn': 127, 'fp': 54, 'fn': 30, 'auroc': 0.8268254442287591, 'auprc': 0.7839758107699617, 'eval_loss': 0.5615500050286452}
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


>>>>> Model_23: Calculando F-Score de Validação
>>>>>>>>>> Arg-Max F1-score Validation: 0.7375
>>>>>>>>>> Sigmoide F1-score Validation: 0.7375
>>>>> Model_23: Predizendo valores


  0%|          | 0/329 [00:00<?, ?it/s]

  0%|          | 0/42 [00:00<?, ?it/s]

>>>>> Model_23: Métricas
              precision    recall  f1-score   support

    negativa       0.77      0.68      0.72       180
    positiva       0.66      0.75      0.70       149

    accuracy                           0.71       329
   macro avg       0.71      0.71      0.71       329
weighted avg       0.72      0.71      0.71       329

>>>>> Model_23: Salvando Métricas em JSON
>>>>> Model_23: Gerando matriz de confusão e salvando
>>>>> Model_23: Salvando Arquivo com predições de Dados de Teste
>>>>>>>>>>>>>>> Model_23: Fim
Modelo: Model_24  | Batch Size: 32  | Learning Rate: 2e-05  | Épocas: 5


Some weights of the model checkpoint at neuralmind/bert-large-portuguese-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from th

>>>>> Model_24: Treinando modelo


INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


  0%|          | 0/2630 [00:00<?, ?it/s]

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/83 [00:00<?, ?it/s]

Running Epoch 1 of 5:   0%|          | 0/83 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/83 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/83 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/83 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of bert model complete. Saved to outputs/.
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


>>>>> Model_24: Validando com dados de Validação


  0%|          | 0/329 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/42 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.5715791238418152, 'tp': 121, 'tn': 137, 'fp': 44, 'fn': 27, 'auroc': 0.8641555920561446, 'auprc': 0.8317049612413894, 'eval_loss': 0.5162001549975858}
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


>>>>> Model_24: Calculando F-Score de Validação
>>>>>>>>>> Arg-Max F1-score Validation: 0.7732
>>>>>>>>>> Sigmoide F1-score Validation: 0.7732
>>>>> Model_24: Predizendo valores


  0%|          | 0/329 [00:00<?, ?it/s]

  0%|          | 0/42 [00:00<?, ?it/s]

>>>>> Model_24: Métricas
              precision    recall  f1-score   support

    negativa       0.79      0.72      0.75       180
    positiva       0.69      0.77      0.73       149

    accuracy                           0.74       329
   macro avg       0.74      0.74      0.74       329
weighted avg       0.74      0.74      0.74       329

>>>>> Model_24: Salvando Métricas em JSON
>>>>> Model_24: Gerando matriz de confusão e salvando
>>>>> Model_24: Salvando Arquivo com predições de Dados de Teste
>>>>>>>>>>>>>>> Model_24: Fim


In [None]:
train_df.to_csv(file_train_dataset_name, sep=';', encoding='utf-8-sig') 
valid_df.to_csv(file_validation_dataset_name, sep=';', encoding='utf-8-sig') 
test_df.to_csv(file_test_dataset_name, sep=';', encoding='utf-8-sig') 

for file in arr_file_names:
  files.download(file)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
model.model

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(29794, 1024, padding_idx=0)
      (position_embeddings): Embedding(512, 1024)
      (token_type_embeddings): Embedding(2, 1024)
      (LayerNorm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=1024, out_features=1024, bias=True)
              (key): Linear(in_features=1024, out_features=1024, bias=True)
              (value): Linear(in_features=1024, out_features=1024, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=1024, out_features=1024, bias=True)
              (LayerNorm): LayerNorm((1024,), eps=1

In [None]:
architecture_file_name = "architecture_model.txt"
text_file = open(architecture_file_name, "w")
text_file.write(str(model.model))
text_file.close()

files.download(architecture_file_name)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>