<a href="https://colab.research.google.com/github/Cirenata/BERTimbau-PT-BR-SC/blob/main/CSPortugu%C3%AAs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install transformers
!pip install ipython-autotime

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.20.1-py3-none-any.whl (4.4 MB)
[K     |████████████████████████████████| 4.4 MB 29.8 MB/s 
[?25hCollecting tokenizers!=0.11.3,<0.13,>=0.11.1
  Downloading tokenizers-0.12.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.6 MB)
[K     |████████████████████████████████| 6.6 MB 53.2 MB/s 
Collecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.8.1-py3-none-any.whl (101 kB)
[K     |████████████████████████████████| 101 kB 13.5 MB/s 
Collecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 66.9 MB/s 
Installing collected packages: pyyaml, tokenizers, huggingface-hub, transformers
  Attempting uninstall: pyyaml
    Found existing installation: PyYAML 3.13
    Uninsta

In [3]:
from torch import optim
from torch.optim import Adam
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModel

import math
import numpy as np
import pandas as pd
import statistics
import torch
import torch.nn as nn
import torch.nn.functional as F

In [4]:
%load_ext autotime

time: 406 µs (started: 2022-06-27 12:39:54 +00:00)


In [5]:
##### classes e funções auxiliares

# classe do classificador 
class TextClassifier(nn.Module):

  def __init__(self, num_classes = 2):
    super(TextClassifier, self).__init__()

    self.model = AutoModel.from_pretrained('neuralmind/bert-base-portuguese-cased')
    self.hidden = nn.Linear(768, 100)    
    self.classes = nn.Linear(100, num_classes)

    

  def forward(self, input_id, mask):
    _, pooled_output = self.model(input_ids = input_id, attention_mask = mask, return_dict = False)    
    x = F.relu(self.hidden(pooled_output))
    x = F.log_softmax(self.classes(x), dim = 1)
    
    return x


# classe do Dataset, para carregar as informações
# necessário dar margem senão dá erro no pytorch. Colocar padding para que todos os vetores sejam do mesmo tamanho
class Dataset(torch.utils.data.Dataset):

  def __init__(self, df, collabels, coltext):
    # mapeando os valores da coluna de rótulos no dataframe para valores numéricos
    uniques = dict()
    anc = df[collabels].unique()
    anc.sort()
    for i, val in enumerate(anc):
      uniques[val] = i
    self.uniques = uniques
    self.labels = [uniques[label] for label in df[collabels]]
    self.texts = [tokenizer(text, padding = 'max_length', max_length = 512,
                            truncation = True, return_tensors = 'pt') for text in df[coltext]]

  def classes(self):
    return self.labels

  
  def __len__(self):
    return len(self.labels)

  
  def get_batch_labels(self, idx):
    #fetch a batch of labels
    return np.array(self.labels[idx])


  def get_batch_texts(self, idx):
    #fetch a batch of inputs
    return self.texts[idx]


  def __getitem__(self, idx):    
    batch_texts = self.get_batch_texts(idx)
    batch_y = self.get_batch_labels(idx)

    return batch_texts, batch_y

# loop de treino
def train_test(model, df, collabels, coltext, learning_rate = 0.001, epochs = 2, split = [80, 20], balance = True, save = False):
  print(f'\nTraining\
          \nTraining Label Column: {collabels}\
          \nLearning Rate: {learning_rate}\
          \nEpochs: {epochs}\n')

  # estabelecendo o número de classes
  num_classes = len(df[collabels].unique())  
  
  # Funções de perda e otimização
  criterion = nn.CrossEntropyLoss()
  optimizer = Adam(model.parameters(), lr = learning_rate, eps = 1e-7)
  
  # Avaliar o uso de GPU ou CPU, a ser usado com 'to(device)' para a conversão
  use_cuda = torch.cuda.is_available()
  device = torch.device('cuda' if use_cuda else 'cpu')  
  if use_cuda:
    model = model.cuda()
    criterion = criterion.cuda()
  
  # definição do tipo de validação:  
  if type(split) is list and sum(split) == 100:
    if len(split) == 2: 
      val_type = 0          # val_type = 0 - lista de tamanho 2 - Apenas treino e teste
      print(f'Train/ Test training.')
    elif len(split) == 3:
      val_type = 1          # val_type = 1 - lista de tamanho 3 - Treino/ Validação / Teste
      print(f'Train/ Validation/ Test training.')
  elif type(split) is int and split < df.shape[0]/4:
    val_type = 2            # val_type = 2 - inteiro - K-fold Cross-Validation
    print('K-fold Cross-Validation training.')
  else:
    raise ValueError('The split value should be an integer (for K-Fold Cross-Validation),'\
                     'The integer must be less than 1/4 the dataset size (fold size at least 4)'\
                     'a list of length 2 (for train/test) or a list of length 3 (for train/validation/test)'\
                     'Split values list should reflect percentage, summing up 100')

  # Criando um df para cada valor de rótulo. Serão proporcionalmente concatenados conforme a distribuição do split,
  # a fim de manter a proporção. O dataloader se encarregará de embaralhá-los mais à frente.
  subdf = dict()
  sizes = dict()
  for key in df[collabels].unique():
    subdf[key] = df[df[collabels] == key]
    sizes[key] = len(subdf[key])
  # balanceamento pelo método da redução ao tamanho da menor classe
  if balance:
    for key in subdf.keys():
      subdf[key] = subdf[key].iloc[:min(sizes.values())]
      sizes[key] = len(subdf[key])
  del df
  
  # preparando o split do dataset 
  # lista placeholder para as divisões de treino (, validação) e teste
  folds = []       
   
  marks = [0]    
  for mark in (range(split) if val_type == 2 else split):
    # lista dos marcos de início e fim das divisões em split, para int e para list
    marks.append(marks[-1] + (round(100/split, 5) if val_type == 2 else mark ))
  for i in (range(split) if val_type == 2 else range(len(split))):
    # lista em que vamos juntar os df dos folds de cada valor de rótulo
    join = []
    # para cada df rótulo
    for key in subdf.keys():
      # anexar um daquele rótulo
      join.append(
          subdf[key].iloc[
                          # do começo daquela divisão, proporcional ao tamanho do df de rótulo
                          math.floor(marks[i]/100*sizes[key])
                          : # até
                          # o fim daquela divisão, proporcional ao tamanho do df de rótulo
                          math.ceil(marks[i+1]/100*sizes[key])
                          ]
          )
    # com um df de rótulos de cada tipo, com tamanho proporcional ao fold, podemos concatená-los em um df
    join = pd.concat(join) 
    folds.append(join)
 
  del subdf, sizes    

  ### treino
  print('Starting training')
  for turn_num in range(split if val_type == 2 else 1):
    print(f'Round: {turn_num + 1 if val_type == 2 else 1} of {split if val_type == 2 else 1}')
    # O conjunto de teste é o último da lista folds
    test_index = -1
    
    # se a validação for kfold, o conjunto de teste varia por turno
    if val_type == 2:
      test_index = turn_num
    test_data = folds.pop(test_index)    
    
    # se for Train/ Validation/ Test validation, retirar os dados de validação
    if val_type == 1:
      val_data = folds.pop(-1)
    
    # carregando os dados de treino (o restante), datasets e dataloaders
    folds_datasets = []
    for fold in folds:  
      folds_datasets.append( 
          Dataset(fold, collabels, coltext)    
      )
    train = torch.utils.data.ConcatDataset(folds_datasets)    
    train_dataloader = torch.utils.data.DataLoader(train, batch_size = 2, shuffle = True)
    

    for epoch_num in range(epochs):
      total_acc_train = 0
      total_loss_train = 0

      for train_input, train_label in tqdm(train_dataloader):        
        train_label = train_label.to(device)
        
        mask = train_input['attention_mask'].to(device)
        input_id = train_input['input_ids'].squeeze(1).to(device)

        output = model(input_id, mask)

        batch_loss = criterion(output, train_label)
        total_loss_train += batch_loss.item()

        acc = (output.argmax(dim = 1) == train_label).sum().item()
        total_acc_train += acc

        model.zero_grad()
        batch_loss.backward()
        optimizer.step()
      # validação da época no modo Train/Validation/Test
      sitrep = f'Epochs: {epoch_num + 1} | Train Loss: {total_loss_train / len(train): .3f} \
            | Train Accuracy: {total_acc_train / len(train): .3f}' 
      if val_type == 1:
        val_metrics = evaluate(model, val_data, collabels, coltext, criterion = criterion)
        sitrep += f'| Val Loss: {val_metrics["Loss"] / len(val_data): .3f if val_type == 1 else ""} \
            | Val Accuracy: {val_metrics["Accuracy"]: .3f if val_type == 1 else ""}'
      print(sitrep)        
    # testando com o set de teste
    metrics = evaluate(model, test_data, collabels, coltext)
    if val_type == 2:      
      # inicializando o dicionário de métricas acumuladas
      if turn_num == 0:
        acc_metrics = dict()
        for key in metrics.keys():
          acc_metrics[key] = []
      for key in metrics.keys():
        acc_metrics[key].append(metrics[key])
      if metrics['Accuracy'] == max(acc_metrics['Accuracy']) and save == True:
        save_model(model, name= f'{split}fold-CS-{collabels.replace("/","-")}-LR{learning_rate}-Epochs{epochs}')
      #model.zero_grad()
      #optimizer.zero_grad()      
      model = TextClassifier(num_classes = num_classes)
      optimizer = Adam(model.parameters(), lr = learning_rate, eps = 1e-7)
      if use_cuda:
        model = model.cuda()
      #repondo o fold de treino para reiniciar o k-fold
      folds.insert(test_index, test_data)
  if val_type == 2:
    for key in acc_metrics.keys():
      print(f'Average {key}: {statistics.mean(acc_metrics[key])}')
    print(f'Best Accuracy (saved model): { max(acc_metrics["Accuracy"])}')

#função de avaliação
def evaluate(model, data, collabels, coltext, criterion = None):
  print(f'Testing data for {collabels} labels')
  test = Dataset(data, collabels, coltext)
  test_dataloader = torch.utils.data.DataLoader(test, batch_size = 1)

  use_cuda = torch.cuda.is_available()
  device = torch.device('cuda' if use_cuda else 'cpu')

  if use_cuda:
    model = model.cuda()  
  actuls = dict(data[collabels].value_counts())
  trus = dict()         # Armazena predições corretas
  fals = dict()         # Armazena predições incorretas
  inds = dict()      # Armazena a classe correspondente a cada índice (em output.argmax)
  uniques = test.uniques
  for key in uniques.keys():
    trus[key] = 0
    fals[key] = 0
    inds[uniques[key]] = key
  eval_loss = 0

  with torch.no_grad():
    for test_input, test_label in tqdm(test_dataloader):
      test_label = test_label.to(device)
      mask = test_input['attention_mask'].to(device)
      input_id = test_input['input_ids'].squeeze(1).to(device)

      # aqui model retorna a saída da rede neural, com 1 dimensão para cada classe.
      output = model(input_id, mask)

      if criterion:
        batch_loss = criterion (output, test_label)
        eval_loss += batch_loss.item()

      pred = output.argmax(dim = 1).item()
      # Se a predição for correta, soma à classe em trus. Senão, em fals
      if pred == test_label:
        trus[inds[pred]] += 1
      else:
        fals[inds[pred]] += 1
  # dicionário de métricas
  print('\n')
  metrics = {
        'Accuracy' : sum(trus.values())/(len(test)),          
    }
  for key in trus.keys():
    try:
      metrics[f'Precision for "{key}"'] = trus[key]/(trus[key] + fals[key])
    except ZeroDivisionError:
      print(f"Couldn't calculate Precision for '{key}'")
    try:
      metrics[f'Recall for "{key}"'] = trus[key]/actuls[key]
    except ZeroDivisionError:
      print(f"Couldn't calculate Recall for '{key}'")
    try:
      metrics[f'F1 for "{key}"'] = 2 * (metrics[f'Precision for "{key}"'] * metrics[f'Recall for "{key}"']) / (metrics[f'Precision for "{key}"'] + metrics[f'Recall for "{key}"'])
    except (ZeroDivisionError, KeyError):
      print(f"Couldn't calculate F1 for '{key}'")  
  if criterion:
    metrics['Loss'] = eval_loss    
  else:
    for metric in metrics.keys():
      print(f'{metric:=<30}: {metrics[metric]: .3f}')
  return metrics
    

                      
def save_model(model, name = 'TextClassifier'):
  path = f'./drive/MyDrive/Pesquisa/{name}.pt'
  torch.save(model.state_dict(), path )
  print(f'Saved file as {path}') 

def load_model(name = 'TextCLassifier'):
  path = f'./drive/MyDrive/Pesquisa/{name}.pt'
  model = TextClassifier()
  model.load_state_dict(torch.load(path))
  model.eval()
  print(f'Loaded file {name} from folder {path}')
  return model

time: 691 ms (started: 2022-06-27 12:39:54 +00:00)


# Training Settings

It's possible to choose the corpus (preprocessed or not) to be trained. Change the key for the 'corpora' dictionary in the 'end' variable.

* 'books' : Books reviews corpus (350 entries)
* 'electronics' : electronics products reviews corpus (233 entries)
* 'computer' : Computer-BR corpus
* 'books-preproc' : Books reviews corpus, preprocessed using Belisario et al. code's 'preprocess' function 
* 'electronics-preproc' : electronics products reviews corpus, preprocessed using Belisario et al. code's 'preprocess' function  
* 'computer-preproc' : Computer-BR corpus, preprocessed using Belisario et al. code's 'preprocess' function  

In [6]:
# carregar os tokenizadores e os modeladores para empregar nas classes de RNA
tokenizer = AutoTokenizer.from_pretrained('neuralmind/bert-base-portuguese-cased')

# rótulos das colunas utilizadas dos dataframes
labels_col = 'OBJ/SUBJ'
text_col = 'FRASE'
corpora = {
    'books' : 'https://raw.githubusercontent.com/Cirenata/BERTimbau-PT-BR-SC-no-preproc/main/corpus_book_reviews_portuguese.csv',
    'electronics' : 'https://raw.githubusercontent.com/Cirenata/BERTimbau-PT-BR-SC-no-preproc/main/Subjectivity-annotated_corpus_on_electronic_product_domain-anotacao-BELISARIO.CSV',
    'computer' : 'https://raw.githubusercontent.com/Cirenata/BERTimbau-PT-BR-SC/main/Computer-BR-raw.csv',
    'books-preproc' : 'https://raw.githubusercontent.com/Cirenata/BERTimbau-PT-BR-SC/main/corpus_book_reviews_portuguese_preproc.csv',
    'electronics-preproc' : 'https://raw.githubusercontent.com/Cirenata/BERTimbau-PT-BR-SC/main/Subjectivity-annotated_corpus_on_electronic_product_domain-anotacao-BELISARIO-preproc.CSV',
    'computer-preproc' : 'https://raw.githubusercontent.com/Cirenata/BERTimbau-PT-BR-SC/main/Computer-BR-preproc.csv' 

}
# colocando o caminho em uma variável e carregando o dataset a partir do CSV
end = corpora['books-preproc']
df = pd.read_csv(end, usecols = [text_col , labels_col]) 
print(df.head(5))


Downloading:   0%|          | 0.00/43.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/647 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/205k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]

                                               FRASE   OBJ/SUBJ
0  notehbook da dell é um LIXO !!!!!!!!!!!!!!!!!!...  subjetiva
1  #OFERTHEC Notebook Dell Inspiron i14-3442-B10 ...   objetiva
2  To aqui olhando notebook da Dell e vi um de $1...  subjetiva
3  Semana do Cliente FNAC continua com ofertas de...   objetiva
4  Problemas c/ o teclado do seu notebook Dell? A...   objetiva
time: 16.8 s (started: 2022-06-27 12:39:55 +00:00)


In [7]:
# modelo. irá baixar o automodel para o BERT
model = TextClassifier()

Downloading:   0%|          | 0.00/418M [00:00<?, ?B/s]

Some weights of the model checkpoint at neuralmind/bert-base-portuguese-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


time: 29.9 s (started: 2022-06-27 12:40:12 +00:00)


In [8]:
%timeit
# Treinando a rede neural

# parâmetros do treino
EPOCHS = 20
LR = 1e-6

# treinamento do modelo
train_test(model, df, labels_col, text_col, learning_rate = LR, epochs = EPOCHS, split = 3, balance = False)




Training          
Training Label Column: OBJ/SUBJ          
Learning Rate: 1e-06          
Epochs: 20

K-fold Cross-Validation training.
Starting training
Round: 1 of 3


100%|██████████| 774/774 [02:46<00:00,  4.64it/s]


Epochs: 1 | Train Loss:  0.298             | Train Accuracy:  0.720


100%|██████████| 774/774 [02:50<00:00,  4.55it/s]


Epochs: 2 | Train Loss:  0.234             | Train Accuracy:  0.751


100%|██████████| 774/774 [02:50<00:00,  4.55it/s]


Epochs: 3 | Train Loss:  0.187             | Train Accuracy:  0.849


100%|██████████| 774/774 [02:50<00:00,  4.55it/s]


Epochs: 4 | Train Loss:  0.146             | Train Accuracy:  0.897


100%|██████████| 774/774 [02:50<00:00,  4.55it/s]


Epochs: 5 | Train Loss:  0.111             | Train Accuracy:  0.932


100%|██████████| 774/774 [02:50<00:00,  4.55it/s]


Epochs: 6 | Train Loss:  0.079             | Train Accuracy:  0.960


100%|██████████| 774/774 [02:49<00:00,  4.56it/s]


Epochs: 7 | Train Loss:  0.052             | Train Accuracy:  0.980


100%|██████████| 774/774 [02:50<00:00,  4.55it/s]


Epochs: 8 | Train Loss:  0.033             | Train Accuracy:  0.990


100%|██████████| 774/774 [02:50<00:00,  4.55it/s]


Epochs: 9 | Train Loss:  0.022             | Train Accuracy:  0.994


100%|██████████| 774/774 [02:49<00:00,  4.56it/s]


Epochs: 10 | Train Loss:  0.015             | Train Accuracy:  0.997


100%|██████████| 774/774 [02:49<00:00,  4.55it/s]


Epochs: 11 | Train Loss:  0.010             | Train Accuracy:  0.999


100%|██████████| 774/774 [02:50<00:00,  4.55it/s]


Epochs: 12 | Train Loss:  0.006             | Train Accuracy:  0.999


100%|██████████| 774/774 [02:50<00:00,  4.55it/s]


Epochs: 13 | Train Loss:  0.004             | Train Accuracy:  0.999


100%|██████████| 774/774 [02:50<00:00,  4.55it/s]


Epochs: 14 | Train Loss:  0.002             | Train Accuracy:  1.000


100%|██████████| 774/774 [02:50<00:00,  4.55it/s]


Epochs: 15 | Train Loss:  0.002             | Train Accuracy:  1.000


100%|██████████| 774/774 [02:50<00:00,  4.55it/s]


Epochs: 16 | Train Loss:  0.001             | Train Accuracy:  1.000


100%|██████████| 774/774 [02:50<00:00,  4.55it/s]


Epochs: 17 | Train Loss:  0.001             | Train Accuracy:  1.000


100%|██████████| 774/774 [02:50<00:00,  4.55it/s]


Epochs: 18 | Train Loss:  0.000             | Train Accuracy:  1.000


100%|██████████| 774/774 [02:50<00:00,  4.55it/s]


Epochs: 19 | Train Loss:  0.000             | Train Accuracy:  1.000


100%|██████████| 774/774 [02:50<00:00,  4.55it/s]


Epochs: 20 | Train Loss:  0.000             | Train Accuracy:  1.000
Testing data for OBJ/SUBJ labels


100%|██████████| 773/773 [00:28<00:00, 27.41it/s]




Precision for "subjetiva"=====:  0.773


Some weights of the model checkpoint at neuralmind/bert-base-portuguese-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Round: 2 of 3


100%|██████████| 774/774 [02:50<00:00,  4.55it/s]


Epochs: 1 | Train Loss:  0.291             | Train Accuracy:  0.725


100%|██████████| 774/774 [02:50<00:00,  4.55it/s]


Epochs: 2 | Train Loss:  0.233             | Train Accuracy:  0.749


100%|██████████| 774/774 [02:50<00:00,  4.55it/s]


Epochs: 3 | Train Loss:  0.176             | Train Accuracy:  0.858


100%|██████████| 774/774 [02:49<00:00,  4.56it/s]


Epochs: 4 | Train Loss:  0.123             | Train Accuracy:  0.927


100%|██████████| 774/774 [02:50<00:00,  4.55it/s]


Epochs: 5 | Train Loss:  0.074             | Train Accuracy:  0.972


100%|██████████| 774/774 [02:50<00:00,  4.55it/s]


Epochs: 6 | Train Loss:  0.043             | Train Accuracy:  0.989


100%|██████████| 774/774 [02:50<00:00,  4.55it/s]


Epochs: 7 | Train Loss:  0.026             | Train Accuracy:  0.995


100%|██████████| 774/774 [02:50<00:00,  4.55it/s]


Epochs: 8 | Train Loss:  0.016             | Train Accuracy:  0.998


100%|██████████| 774/774 [02:50<00:00,  4.54it/s]


Epochs: 9 | Train Loss:  0.012             | Train Accuracy:  0.998


100%|██████████| 774/774 [02:50<00:00,  4.55it/s]


Epochs: 10 | Train Loss:  0.010             | Train Accuracy:  0.998


100%|██████████| 774/774 [02:50<00:00,  4.54it/s]


Epochs: 11 | Train Loss:  0.009             | Train Accuracy:  0.998


100%|██████████| 774/774 [02:50<00:00,  4.54it/s]


Epochs: 12 | Train Loss:  0.008             | Train Accuracy:  0.998


100%|██████████| 774/774 [02:50<00:00,  4.54it/s]


Epochs: 13 | Train Loss:  0.007             | Train Accuracy:  0.998


100%|██████████| 774/774 [02:50<00:00,  4.54it/s]


Epochs: 14 | Train Loss:  0.007             | Train Accuracy:  0.998


100%|██████████| 774/774 [02:50<00:00,  4.54it/s]


Epochs: 15 | Train Loss:  0.006             | Train Accuracy:  0.998


100%|██████████| 774/774 [02:50<00:00,  4.54it/s]


Epochs: 16 | Train Loss:  0.005             | Train Accuracy:  0.998


100%|██████████| 774/774 [02:50<00:00,  4.54it/s]


Epochs: 17 | Train Loss:  0.006             | Train Accuracy:  0.997


100%|██████████| 774/774 [02:50<00:00,  4.55it/s]


Epochs: 18 | Train Loss:  0.003             | Train Accuracy:  0.999


100%|██████████| 774/774 [02:50<00:00,  4.55it/s]


Epochs: 19 | Train Loss:  0.003             | Train Accuracy:  0.999


100%|██████████| 774/774 [02:50<00:00,  4.55it/s]


Epochs: 20 | Train Loss:  0.002             | Train Accuracy:  0.999
Testing data for OBJ/SUBJ labels


100%|██████████| 774/774 [00:28<00:00, 27.35it/s]




Precision for "subjetiva"=====:  0.771


Some weights of the model checkpoint at neuralmind/bert-base-portuguese-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Round: 3 of 3


100%|██████████| 774/774 [02:50<00:00,  4.54it/s]


Epochs: 1 | Train Loss:  0.266             | Train Accuracy:  0.740


100%|██████████| 774/774 [02:50<00:00,  4.55it/s]


Epochs: 2 | Train Loss:  0.182             | Train Accuracy:  0.851


100%|██████████| 774/774 [02:50<00:00,  4.55it/s]


Epochs: 3 | Train Loss:  0.135             | Train Accuracy:  0.898


100%|██████████| 774/774 [02:50<00:00,  4.54it/s]


Epochs: 4 | Train Loss:  0.099             | Train Accuracy:  0.941


100%|██████████| 774/774 [02:50<00:00,  4.55it/s]


Epochs: 5 | Train Loss:  0.068             | Train Accuracy:  0.966


100%|██████████| 774/774 [02:50<00:00,  4.55it/s]


Epochs: 6 | Train Loss:  0.046             | Train Accuracy:  0.981


100%|██████████| 774/774 [02:50<00:00,  4.55it/s]


Epochs: 7 | Train Loss:  0.030             | Train Accuracy:  0.990


100%|██████████| 774/774 [02:50<00:00,  4.55it/s]


Epochs: 8 | Train Loss:  0.019             | Train Accuracy:  0.995


100%|██████████| 774/774 [02:49<00:00,  4.55it/s]


Epochs: 9 | Train Loss:  0.013             | Train Accuracy:  0.997


100%|██████████| 774/774 [02:49<00:00,  4.55it/s]


Epochs: 10 | Train Loss:  0.009             | Train Accuracy:  0.998


100%|██████████| 774/774 [02:50<00:00,  4.55it/s]


Epochs: 11 | Train Loss:  0.007             | Train Accuracy:  0.998


100%|██████████| 774/774 [02:50<00:00,  4.55it/s]


Epochs: 12 | Train Loss:  0.007             | Train Accuracy:  0.997


100%|██████████| 774/774 [02:50<00:00,  4.55it/s]


Epochs: 13 | Train Loss:  0.004             | Train Accuracy:  0.999


100%|██████████| 774/774 [02:50<00:00,  4.55it/s]


Epochs: 14 | Train Loss:  0.003             | Train Accuracy:  0.999


100%|██████████| 774/774 [02:50<00:00,  4.55it/s]


Epochs: 15 | Train Loss:  0.003             | Train Accuracy:  0.999


100%|██████████| 774/774 [02:49<00:00,  4.55it/s]


Epochs: 16 | Train Loss:  0.001             | Train Accuracy:  1.000


100%|██████████| 774/774 [02:50<00:00,  4.55it/s]


Epochs: 17 | Train Loss:  0.001             | Train Accuracy:  1.000


100%|██████████| 774/774 [02:50<00:00,  4.55it/s]


Epochs: 18 | Train Loss:  0.001             | Train Accuracy:  1.000


100%|██████████| 774/774 [02:50<00:00,  4.55it/s]


Epochs: 19 | Train Loss:  0.000             | Train Accuracy:  1.000


100%|██████████| 774/774 [02:50<00:00,  4.55it/s]


Epochs: 20 | Train Loss:  0.000             | Train Accuracy:  1.000
Testing data for OBJ/SUBJ labels


100%|██████████| 774/774 [00:28<00:00, 27.39it/s]




Precision for "subjetiva"=====:  0.523


Some weights of the model checkpoint at neuralmind/bert-base-portuguese-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Average Accuracy: 0.8212145928533305
Average Precision for "objetiva": 0.9088115802362869
Average Recall for "objetiva": 0.842215052389471
Average F1 for "objetiva": 0.8680684546969434
Average Precision for "subjetiva": 0.688980086944977
Average Recall for "subjetiva": 0.7663551401869159
Average F1 for "subjetiva": 0.7092020067679298
Best Accuracy (saved model): 0.8602846054333765
time: 2h 51min 53s (started: 2022-06-27 12:40:41 +00:00)


In [9]:
'''# Teste para 3 classes com 80/20
df2 = df
df2.iloc[118:234] = df2.iloc[118:234].replace(to_replace = ['objetiva', 'subjetiva'], value = ['indecisa']*2)
df2'''

"# Teste para 3 classes com 80/20\ndf2 = df\ndf2.iloc[118:234] = df2.iloc[118:234].replace(to_replace = ['objetiva', 'subjetiva'], value = ['indecisa']*2)\ndf2"

time: 9.18 ms (started: 2022-06-27 15:32:35 +00:00)


In [10]:
'''# modelo para treino com 3 classes
model2 = TextClassifier(num_classes = 3)
train_test(model2, df2, labels_col, text_col, learning_rate = 0.0001, epochs = 10 )'''

'# modelo para treino com 3 classes\nmodel2 = TextClassifier(num_classes = 3)\ntrain_test(model2, df2, labels_col, text_col, learning_rate = 0.0001, epochs = 10 )'

time: 6.54 ms (started: 2022-06-27 15:32:35 +00:00)
