In [1]:
from datasets import load_dataset
from transformers import AutoTokenizer, DataCollatorWithPadding

raw_datasets = load_dataset("glue", "mrpc")
checkpoint = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)


def tokenize_function(example):
    return tokenizer(example["sentence1"], example["sentence2"], truncation=True)


tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

  from .autonotebook import tqdm as notebook_tqdm
Map: 100%|██████████| 3668/3668 [00:00<00:00, 28429.18 examples/s]


In [2]:
#first step = define argument that will contain all the hyperparameters 
# the Trainer will use for training and evaluation.
from transformers import TrainingArguments

training_args = TrainingArguments("test-trainer") #The only argument you have to 
                                                  #provide is a directory where the 
                                                  #trained model will be saved
                                                  #as well as the checkpoints along the way
                                                  #For all the rest, you can leave the 
                                                  # defaults, which should work pretty 
                                                  # well for a basic fine-tuning.

In [3]:
#second step: define our model:use the AutoModelForSequenceClassification class, 
# with two labels:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


you get a warning after instantiating this pretrained model. This is because BERT has not been pretrained on classifying pairs of sentences, so the head of the pretrained model has been discarded and a new head suitable for sequence classification has been added instead. The warnings indicate that some weights were not used (the ones corresponding to the dropped pretraining head) and that some others were randomly initialized (the ones for the new head). It concludes by encouraging you to train the model, which is exactly what we are going to do now.

In [4]:
#third step: define a trainer by passing it all the objects constructed up to now — 
#the model, the training_args, the training and validation datasets, our data_collator, 
#and our tokenizer:
from transformers import Trainer

trainer = Trainer(
    model,
    training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    tokenizer=tokenizer,
)
#Note that when you pass the tokenizer as we did here, the default data_collator 
#used by the Trainer will be a DataCollatorWithPadding as defined previously, 
#so you can skip the line data_collator=data_collator in this call. 

In [5]:
#To fine-tune the model on our dataset
trainer.train() #This will start the fine-tuning and report 
                #the training loss every 500 steps.



Step,Training Loss


TrainOutput(global_step=459, training_loss=0.3126908512157033, metrics={'train_runtime': 83.2534, 'train_samples_per_second': 132.175, 'train_steps_per_second': 5.513, 'total_flos': 441241350948960.0, 'train_loss': 0.3126908512157033, 'epoch': 3.0})

In [6]:
#forth step: Evaluation
import evaluate
import numpy as np

predictions = trainer.predict(tokenized_datasets["validation"]) #gives us a logit, it needs softmax
preds = np.argmax(predictions.predictions, axis=-1)
metric = evaluate.load("glue", "mrpc")
metric.compute(predictions=preds, references=predictions.label_ids)

{'accuracy': 0.8406862745098039, 'f1': 0.8881239242685026}

In [7]:
#Wrapping everything together
def compute_metrics(eval_preds):
    metric = evaluate.load("glue", "mrpc")
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

training_args = TrainingArguments("test-trainer", evaluation_strategy="epoch")
model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)

trainer = Trainer(
    model,
    training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

trainer.train()


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,0.346978,0.872549,0.905797
2,No log,0.339805,0.870098,0.906195
3,No log,0.488169,0.862745,0.904762




TrainOutput(global_step=459, training_loss=0.31819109615417346, metrics={'train_runtime': 88.986, 'train_samples_per_second': 123.66, 'train_steps_per_second': 5.158, 'total_flos': 441241350948960.0, 'train_loss': 0.31819109615417346, 'epoch': 3.0})