In [None]:
# Transformers installation
! pip install transformers datasets
!pip install evaluate
!pip install deepspeed
!pip install accelerate
!pip install wandb
# To install from source instead of the last release, comment the command above and uncomment the following one.
# ! pip install git+https://github.com/huggingface/transformers.git

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.25.1-py3-none-any.whl (5.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.8/5.8 MB[0m [31m51.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting datasets
  Downloading datasets-2.8.0-py3-none-any.whl (452 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m452.9/452.9 KB[0m [31m48.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting huggingface-hub<1.0,>=0.10.0
  Downloading huggingface_hub-0.11.1-py3-none-any.whl (182 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m182.4/182.4 KB[0m [31m23.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m96.9 MB/s[0m eta [36m0:00:00[0

In [None]:
import os

os.environ["MASTER_ADDR"] = "localhost"
os.environ["MASTER_PORT"] = "9994"  # modify if RuntimeError: Address already in use
os.environ["RANK"] = "0"
os.environ["LOCAL_RANK"] = "0"
os.environ["WORLD_SIZE"] = "1"

# Fine-tune a pretrained model

There are significant benefits to using a pretrained model. It reduces computation costs, your carbon footprint, and allows you to use state-of-the-art models without having to train one from scratch. 🤗 Transformers provides access to thousands of pretrained models for a wide range of tasks. When you use a pretrained model, you train it on a dataset specific to your task. This is known as fine-tuning, an incredibly powerful training technique. In this tutorial, you will fine-tune a pretrained model with a deep learning framework of your choice:

* Fine-tune a pretrained model with 🤗 Transformers [Trainer](https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.Trainer).
* Fine-tune a pretrained model in TensorFlow with Keras.
* Fine-tune a pretrained model in native PyTorch.

<a id='data-processing'></a>

## Prepare a dataset

Before you can fine-tune a pretrained model, download a dataset and prepare it for training. The previous tutorial showed you how to process data for training, and now you get an opportunity to put those skills to the test!

Begin by loading the [Yelp Reviews](https://huggingface.co/datasets/yelp_review_full) dataset:

In [None]:
from huggingface_hub import notebook_login
notebook_login()

Token is valid.
Your token has been saved in your configured git credential helpers (store).
Your token has been saved to /root/.huggingface/token
Login successful


In [None]:
from datasets import load_dataset

dataset = load_dataset('tiagoblima/nilc-punctuation-explainable')
dataset["train"][100]



  0%|          | 0/1 [00:00<?, ?it/s]

{'text': 'quem vai dizer se isso funciona ou não são os futuros músicos.',
 'label': 'correct',
 'reference': 'Quem vai dizer se isso funciona OU não são OS futuros músicos.',
 '__index_level_0__': 107}

As you now know, you need a tokenizer to process the text and include a padding and truncation strategy to handle any variable sequence lengths. To process your dataset in one step, use 🤗 Datasets [`map`](https://huggingface.co/docs/datasets/process.html#map) method to apply a preprocessing function over the entire dataset:

In [None]:
from transformers import AutoTokenizer
checkpoint = "neuralmind/bert-base-portuguese-cased"
#"tiagoblima/punctuation-nilc-bert-large"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

max_length = 512
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=max_length)


tokenized_datasets = dataset.map(tokenize_function, batched=True)

  0%|          | 0/2 [00:00<?, ?ba/s]

In [None]:

id2label = dict(enumerate(set(dataset['train']['label'])))
label2id = dict(map(reversed, id2label.items()))
def map_label(batch):
  batch["label"] = label2id[batch['label']]
  return batch
tokenized_datasets = tokenized_datasets.map(map_label)
tokenized_datasets

  0%|          | 0/1202 [00:00<?, ?ex/s]

DatasetDict({
    train: Dataset({
        features: ['text', 'label', 'reference', '__index_level_0__', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 1202
    })
})

In [None]:
id2label

{0: 'correct', 1: 'incorrect'}

In [None]:
tokenized_datasets['train']['input_ids'][5][:4]

[101, 1790, 2226, 22287]

In [None]:
tokenized_datasets = tokenized_datasets['train'].train_test_split(test_size=0.1)

If you like, you can create a smaller subset of the full dataset to fine-tune on to reduce the time it takes:

In [None]:
train_dataset = tokenized_datasets['train']
eval_dataset = tokenized_datasets['test']

In [None]:
id2label

{0: 'correct', 1: 'incorrect'}

In [None]:
train_labels = train_dataset['label']
eval_labels = eval_dataset['label']
convert_ids = lambda item: (id2label[item[0]], item[1])


In [None]:
from collections import Counter
stats = {
    'train': dict(map(convert_ids, Counter(train_labels).items())), 
    'test':dict(map(convert_ids, Counter(eval_labels).items())), 
}

import pandas as pd 


stats_df = pd.DataFrame.from_dict(stats, orient='index').T
stats_df

Unnamed: 0,train,test
incorrect,544,57
correct,537,64


In [None]:
stats_df.to_csv()

',train,test\nincorrect,544,57\ncorrect,537,64\n'

<a id='trainer'></a>

## Train

At this point, you should follow the section corresponding to the framework you want to use. You can use the links
in the right sidebar to jump to the one you want - and if you want to hide all of the content for a given framework,
just use the button at the top-right of that framework's block!

## Train with PyTorch Trainer

🤗 Transformers provides a [Trainer](https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.Trainer) class optimized for training 🤗 Transformers models, making it easier to start training without manually writing your own training loop. The [Trainer](https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.Trainer) API supports a wide range of training options and features such as logging, gradient accumulation, and mixed precision.

Start by loading your model and specify the number of expected labels. From the Yelp Review [dataset card](https://huggingface.co/datasets/yelp_review_full#data-fields), you know there are five labels:

In [None]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=len(label2id), id2label=id2label,
    label2id=label2id)

Downloading:   0%|          | 0.00/438M [00:00<?, ?B/s]

Some weights of the model checkpoint at neuralmind/bert-base-portuguese-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the

<Tip>

You will see a warning about some of the pretrained weights not being used and some weights being randomly
initialized. Don't worry, this is completely normal! The pretrained head of the BERT model is discarded, and replaced with a randomly initialized classification head. You will fine-tune this new model head on your sequence classification task, transferring the knowledge of the pretrained model to it.

</Tip>

### Training hyperparameters

Next, create a [TrainingArguments](https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.TrainingArguments) class which contains all the hyperparameters you can tune as well as flags for activating different training options. For this tutorial you can start with the default training [hyperparameters](https://huggingface.co/docs/transformers/main_classes/trainer#transformers.TrainingArguments), but feel free to experiment with these to find your optimal settings.

Specify where to save the checkpoints from your training:

In [None]:
from transformers import TrainingArguments

training_args = TrainingArguments(output_dir="test_trainer")

### Evaluate

[Trainer](https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.Trainer) does not automatically evaluate model performance during training. You'll need to pass [Trainer](https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.Trainer) a function to compute and report metrics. The [🤗 Evaluate](https://huggingface.co/docs/evaluate/index) library provides a simple [`accuracy`](https://huggingface.co/spaces/evaluate-metric/accuracy) function you can load with the [evaluate.load](https://huggingface.co/docs/evaluate/main/en/package_reference/loading_methods#evaluate.load) (see this [quicktour](https://huggingface.co/docs/evaluate/a_quick_tour) for more information) function:

In [None]:
import numpy as np
import evaluate

metric = evaluate.load('accuracy')

Downloading builder script:   0%|          | 0.00/4.20k [00:00<?, ?B/s]

Call `compute` on `metric` to calculate the accuracy of your predictions. Before passing your predictions to `compute`, you need to convert the predictions to logits (remember all 🤗 Transformers models return logits):

In [None]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    wandb.log({"conf_mat" : wandb.plot.confusion_matrix(probs=None,
                        y_true=labels, preds=predictions,
                        class_names=list(id2label.values()))})
   
    return metric.compute(predictions=predictions, references=labels)

If you'd like to monitor your evaluation metrics during fine-tuning, specify the `evaluation_strategy` parameter in your training arguments to report the evaluation metric at the end of each epoch:

In [None]:
import wandb 

wandb.login(key='8e593ae9d0788bae2e0a84d07de0e76f5cf3dcf4')

ERROR:wandb.jupyter:Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [None]:
from transformers import TrainingArguments, Trainer
MODEL_NAME = "nilc-binary-balanced"
training_args = TrainingArguments(output_dir=MODEL_NAME,
                                  save_strategy="epoch",
                                  auto_find_batch_size=True, 
                                  load_best_model_at_end = True,
                                  evaluation_strategy="epoch", 
                                  num_train_epochs=5,
                                  overwrite_output_dir=True,
                                  push_to_hub=True,
                                  run_name=MODEL_NAME,
                                  report_to="wandb")

### Trainer

Create a [Trainer](https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.Trainer) object with your model, training arguments, training and test datasets, and evaluation function:

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
)

Cloning https://huggingface.co/tiagoblima/nilc-binary-balanced into local empty directory.


Then fine-tune your model by calling [train()](https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.Trainer.train):

In [None]:
trainer.train()

The following columns in the training set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: __index_level_0__, text, reference. If __index_level_0__, text, reference are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 1081
  Num Epochs = 5
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 680
  Number of trainable parameters = 108924674
Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"
[34m[1mwandb[0m: Currently logged in as: [33mtblima[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch,Training Loss,Validation Loss,Accuracy
1,No log,0.174417,0.958678
2,No log,0.281274,0.917355
3,No log,0.249042,0.950413
4,0.200300,0.254496,0.950413
5,0.200300,0.226343,0.950413


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: __index_level_0__, text, reference. If __index_level_0__, text, reference are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 121
  Batch size = 8
Saving model checkpoint to nilc-binary-balanced/checkpoint-136
Configuration saved in nilc-binary-balanced/checkpoint-136/config.json
Model weights saved in nilc-binary-balanced/checkpoint-136/pytorch_model.bin
The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: __index_level_0__, text, reference. If __index_level_0__, text, reference are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 121
  Batch size = 8
Saving model ch

TrainOutput(global_step=680, training_loss=0.1507146814290215, metrics={'train_runtime': 567.4847, 'train_samples_per_second': 9.524, 'train_steps_per_second': 1.198, 'total_flos': 1422115194535936.0, 'train_loss': 0.1507146814290215, 'epoch': 5.0})

In [None]:
mec_dataset = load_dataset('tiagoblima/mec-punctuation-explainable')



  0%|          | 0/1 [00:00<?, ?it/s]

In [None]:
from datasets import concatenate_datasets
def binarize(batch):
  batch["label"] = "correct" if batch["label"] == "pontuação correta" else "incorrect"
  return batch

#dataset = dataset.map(binarize)
#dataset['train'].to_pandas()


new_mec = mec_dataset['train'].map(binarize)
mec_datasets = new_mec.map(tokenize_function, batched=True)
mec_datasets = mec_datasets.map(map_label)



  0%|          | 0/2 [00:00<?, ?ba/s]

  0%|          | 0/1327 [00:00<?, ?ex/s]

In [None]:

label_0 = mec_datasets.filter(lambda item: item['label'] == 0).shuffle(42).select(range(0, 441))
label_1 = mec_datasets.filter(lambda item: item['label'] == 1).shuffle(42).select(range(0, 441))

mec_datasets = concatenate_datasets([label_0, label_1])

mec_datasets

In [None]:
mec_datasets.to_pandas().loc[:, ['label']].value_counts()

label
0        886
1        441
dtype: int64

In [None]:
new_mec['label'][:5]
set(new_mec['label'])

{'correct', 'incorrect'}

In [None]:
result = trainer.evaluate(mec_datasets)
result

The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, annotation_span, annot_id, text_id. If text, annotation_span, annot_id, text_id are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1327
  Batch size = 8


{'eval_loss': 1.6018218994140625,
 'eval_accuracy': 0.5516201959306707,
 'eval_runtime': 41.4611,
 'eval_samples_per_second': 32.006,
 'eval_steps_per_second': 4.004,
 'epoch': 5.0}

In [None]:
import pandas as pd 

pd.DataFrame.from_dict(result, orient='index').to_csv()

',0\neval_loss,1.6018218994140625\neval_accuracy,0.5516201959306707\neval_runtime,41.4611\neval_samples_per_second,32.006\neval_steps_per_second,4.004\nepoch,5.0\n'

In [None]:
from transformers import pipeline
pipe = pipeline("text-classification", model=trainer.model, tokenizer=tokenizer, device=0)

In [None]:
pipe('Eu quero ir para casa mas tá chovendo')

[{'label': 'correct', 'score': 0.9886323809623718}]

In [None]:
from collections import defaultdict

selections = defaultdict(list)
for examples in mec_datasets:
    if len(selections[id2label[examples['label']]]) < 5:
      selections[id2label[examples['label']]].append(examples)


In [None]:
text = eval_dataset['text'][20]
text

'percebe se portanto, que os pacientes que fazem uso desses produtos devem ser acompanhados por médicos e realizar a administração de maneira criteriosa.'

In [None]:
from collections import Counter 

Counter(train_dataset['label'])

Counter({1: 544, 0: 537})

In [None]:
pipe('Eu, fui chamado para a festa mas tô sem grana')



[{'label': 'incorrect', 'score': 0.9857272505760193}]

In [None]:

pipe('Eu fui chamado para a festa, no entanto, tô sem grana')

[{'label': 'correct', 'score': 0.9874381422996521}]

In [None]:
for label in selections:
  for example in selections[label]:
      predicted = pipe(example['text'])
     
      predicted = "Predicted Label: {} Score: {}".format(*predicted[0].values())
      print(example['text'])
      true_label =  id2label[example['label']]
      print(f'{predicted} --> {true_label}')
      print()
  print('-'*30)


aconteceu uma coisa estava chovendo umas chuvas de granizo do nada aconteceu caiu um diamante brilhante na arma da minha casa é depois peguei o diamante levei pro meu quarto é mostrei por meus pais então eles pensaram vender o dia mente na loja de jogas pelos menos iria fazer um bom preço para agente.
Predicted Label: correct Score: 0.88274747133255 --> incorrect

um dia depois aconteceu e choveu bastantes e do nada apareceu outro diamante brilhante e eu pensei fico com o diamante pra mim e fico fazendo coleções de diamante pra eu ter minha propria loja de diamante brilhante.
Predicted Label: incorrect Score: 0.6746932864189148 --> incorrect

nesse dia eu encontrei um diamante ele e muito bonito e na o e pouco não ele e muito lindo e tambem bem mais caro, mesmo bem caro mesmo homem na o nao quis vender ele por recordação e tambem seria bom esperar ele valorizar por que ele mais velho mais caro um exemplo uma pedra preciosa quanto mais velho, mais caro
Predicted Label: incorrect Score: 

<a id='pytorch_native'></a>

## Explanaible AI

In [None]:
!pip install transformers_interpret

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
from transformers import AutoModelForTokenClassification, AutoTokenizer
from transformers_interpret import SequenceClassificationExplainer
from tokenizers.processors import TemplateProcessing

#model = AutoModelForTokenClassification.from_pretrained(MODEL_NAME)
#tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=len(label2id), id2label=id2label,
    label2id=label2id)
ner_explainer = SequenceClassificationExplainer(
    trainer.model,
    tokenizer,
)

loading configuration file nilc-binary-balanced/config.json
Model config BertConfig {
  "_name_or_path": "nilc-binary-balanced",
  "architectures": [
    "BertForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "correct",
    "1": "incorrect"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "correct": 0,
    "incorrect": 1
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_past": true,
  "pad_token_id": 0,
  "pooler_fc_size": 768,
  "pooler_num_attention_heads": 12,
  "pooler_num_fc_layers": 3,
  "pooler_size_per_head": 128,
  "pooler_type": "first_token_transform",
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "torch_dtype": "fl

In [None]:
sample_text = "Eu gostaria de sair no entanto não tenho dinheiro."
# — a vírgula indica a elipse da palavra vezes.
#'Todos gostamos de arroz e feijão alimentos indispensáveis na mesa do brasileiro'


word_attributions = ner_explainer(sample_text)
word_attributions

[('[CLS]', 0.0),
 ('Eu', 0.07332024460325907),
 ('gostaria', 0.20936367524894348),
 ('de', 0.18853982837731512),
 ('sair', 0.32657818211149103),
 ('no', 0.22047551610883911),
 ('entanto', 0.3278901172756012),
 ('não', 0.5450452875044555),
 ('tenho', 0.4940970514038384),
 ('dinheiro', 0.29592704794559155),
 ('.', -0.1539154175147469),
 ('[SEP]', 0.0)]

In [None]:
ner_explainer.visualize(f"bert_ner_viz_.html")

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
0.0,correct (0.98),correct,2.53,[CLS] Eu gostaria de sair no entanto não tenho dinheiro . [SEP]
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
0.0,correct (0.98),correct,2.53,[CLS] Eu gostaria de sair no entanto não tenho dinheiro . [SEP]
,,,,
