# Single BERT Sample

> https://medium.com/towards-artificial-intelligence/text-classification-with-simple-transformers-a29d13358135



In [1]:
!pip install --upgrade transformers
!pip install simpletransformers

Requirement already up-to-date: transformers in /usr/local/lib/python3.6/dist-packages (4.2.2)


In [2]:
import numpy as np
import pandas as pd
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from simpletransformers.classification import ClassificationModel, ClassificationArgs
from sklearn.metrics import classification_report, confusion_matrix
import itertools
import seaborn as sns
import matplotlib.pyplot as plt
import wandb
import logging
from google.colab import files
import json
import gc
import time

In [3]:
logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)

In [31]:
def sigmoid(x):
    z = (1/(1 + np.exp(-x)))
    return z

def show_confusion_matrix(confusion_matrix, name_model):
    hmap = sns.heatmap(confusion_matrix, annot=True, fmt="d", cmap="Blues")
    hmap.yaxis.set_ticklabels(hmap.yaxis.get_ticklabels(), rotation=0, ha='right')
    hmap.xaxis.set_ticklabels(hmap.xaxis.get_ticklabels(), rotation=30, ha='right')
    plt.title('Matriz de Confusão: ' + str(name_model))
    file_matriz_name = str("matriz_")+name_model+str(".png")
    plt.ylabel('Classificação Real')
    plt.xlabel('Classificação Predita')
    plt.savefig(file_matriz_name)
    files.download(file_matriz_name)
    plt.close()

In [30]:
SEED=42
class_names = ['negativa', 'positiva']
EPOCHS = 4
LEARNING_RATE = 5e-05
BATCH_SIZE = 32

In [16]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

file_path = "/content/drive/My Drive/"

data_train = pd.read_csv(str(file_path)+"train_dataset.csv", sep=';')[['sentence','class']]
data_test = pd.read_csv(str(file_path)+"test_dataset.csv", sep=';')[['sentence','class']]
data_validation = pd.read_csv(str(file_path)+"validation_dataset.csv", sep=';')[['sentence','class']]

Mounted at /content/drive


In [29]:
model_args = ClassificationArgs()
model_args.manual_seed = SEED
model_args.num_train_epochs = EPOCHS
model_args.learning_rate = LEARNING_RATE
model_args.train_batch_size = BATCH_SIZE
model_args.overwrite_output_dir = True
model_args.save_steps = -1
model_args.save_model_every_epoch = False

model = ClassificationModel(
  model_type='bert', 
  model_name='neuralmind/bert-large-portuguese-cased', 
  use_cuda=True, 
  args=model_args
)

Some weights of the model checkpoint at neuralmind/bert-large-portuguese-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from th

In [22]:
model.train_model(train_df=data_train)
result, model_outputs, wrong_preds = model.eval_model(eval_df=data_validation)

sigmoid_preds = []
predictions = []

for x in model_outputs:  
    sigmoid_pred = sigmoid(x)
    sigmoid_preds.append(np.argmax(sigmoid_pred))
    predictions.append(np.argmax(x))

sigmoid_preds = np.array(sigmoid_preds)

print('>>>>>>>>>> Arg-Max F1-score Validation:', round(f1_score(data_validation['class'], predictions), 4))
print('>>>>>>>>>> Sigmoide F1-score Validation:', round(f1_score(data_validation['class'], sigmoid_preds), 4))

  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


  0%|          | 0/2630 [00:00<?, ?it/s]

Epoch:   0%|          | 0/4 [00:00<?, ?it/s]

Running Epoch 0 of 4:   0%|          | 0/83 [00:00<?, ?it/s]

Running Epoch 1 of 4:   0%|          | 0/83 [00:00<?, ?it/s]

Running Epoch 2 of 4:   0%|          | 0/83 [00:00<?, ?it/s]

Running Epoch 3 of 4:   0%|          | 0/83 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of bert model complete. Saved to outputs/.
  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."
INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


  0%|          | 0/329 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/42 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.6803354979267836, 'tp': 121, 'tn': 156, 'fp': 25, 'fn': 27, 'auroc': 0.9191802299537107, 'auprc': 0.9045452599496874, 'eval_loss': 0.9437067111150446}


>>>>>>>>>> Arg-Max F1-score Validation: 0.8231
>>>>>>>>>> Sigmoide F1-score Validation: 0.8231


In [28]:
data_test.reset_index(drop=True, inplace=True)
test_predictions, raw_outputs = model.predict(data_test['sentence'])

print(classification_report(data_test['class'], test_predictions, target_names=class_names))

report = classification_report(data_test['class'], test_predictions, target_names=class_names, output_dict=True)
report['F1_score_Test'] = round(f1_score(data_test['class'], test_predictions), 4)
report['F1_score_Validation'] = round(f1_score(data_validation['class'], predictions), 4)

file_metrics_name = str("metrics_model_paper.json")
with open(file_metrics_name, 'w') as outfile:
  json.dump(report, outfile)
files.download(file_metrics_name)

INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.


  0%|          | 0/329 [00:00<?, ?it/s]

  0%|          | 0/42 [00:00<?, ?it/s]

              precision    recall  f1-score   support

    negativa       0.88      0.90      0.89       180
    positiva       0.88      0.85      0.86       149

    accuracy                           0.88       329
   macro avg       0.88      0.87      0.87       329
weighted avg       0.88      0.88      0.88       329



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [25]:
cnf_matrix = confusion_matrix(data_test['class'], test_predictions)
df_cm = pd.DataFrame(cnf_matrix, index=class_names, columns=class_names)

show_confusion_matrix(df_cm, "model_paper")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [26]:
df_final = pd.DataFrame(columns=['sentence','class','predicted_class'])
df_final['sentence'] = data_test['sentence']
df_final['class'] = data_test['class']
df_final['predicted_class'] = test_predictions


file_predictions_name = str("predictions_model_paper.csv")
df_final.to_csv(file_predictions_name, sep=';', encoding='utf-8-sig') 
files.download(file_predictions_name)
  
print(">>>>>>>>>>>>>>> Fim")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

>>>>>>>>>>>>>>> Fim


In [27]:
print(model.args)

ClassificationArgs(adam_epsilon=1e-08, best_model_dir='outputs/best_model', cache_dir='cache_dir/', config={}, cosine_schedule_num_cycles=0.5, custom_layer_parameters=[], custom_parameter_groups=[], dataloader_num_workers=0, do_lower_case=False, dynamic_quantize=False, early_stopping_consider_epochs=False, early_stopping_delta=0, early_stopping_metric='eval_loss', early_stopping_metric_minimize=True, early_stopping_patience=3, encoding=None, adafactor_eps=(1e-30, 0.001), adafactor_clip_threshold=1.0, adafactor_decay_rate=-0.8, adafactor_beta1=None, adafactor_scale_parameter=True, adafactor_relative_step=True, adafactor_warmup_init=True, eval_batch_size=8, evaluate_during_training=False, evaluate_during_training_silent=True, evaluate_during_training_steps=2000, evaluate_during_training_verbose=False, evaluate_each_epoch=True, fp16=True, gradient_accumulation_steps=1, learning_rate=5e-05, local_rank=-1, logging_steps=50, manual_seed=42, max_grad_norm=1.0, max_seq_length=128, model_name='