# Fine-tuning a model with the Trainer API or Keras

Install the Transformers, Datasets, and Evaluate libraries to run this notebook.

In [1]:
!pip install datasets evaluate transformers[sentencepiece]



In [2]:
!pip install --upgrade --quiet datasets

In [3]:
"""
将MRPC数据集分词，然后按批次来组织
"""
from typing import Any, Dict

from datasets import load_dataset
from transformers import AutoTokenizer, DataCollatorWithPadding

# 1. 加载数据集
raw_datasets = load_dataset("glue", "mrpc")

# 2. 对数据集进行分词
ckpt = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(ckpt)
def tokenize_function(sample: Dict[str, Any]):
    return tokenizer(sample["sentence1"], sample["sentence2"], truncation=True)
tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)

# 3. 实例化批次构造器
data_collator = DataCollatorWithPadding(tokenizer)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Map:   0%|          | 0/408 [00:00<?, ? examples/s]

In [4]:
""" 实例化Trainer """
from transformers import AutoModelForSequenceClassification, Trainer, TrainingArguments

# 1. 首先，实例化模型
model = AutoModelForSequenceClassification.from_pretrained(ckpt, num_labels=2)

# 2. 定义超参数
training_args = TrainingArguments("test-trainer")

# 3. 提取训练集以及验证集
train_dataset, eval_dataset = tokenized_datasets["train"], tokenized_datasets["validation"]

# 4. 实例化Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    processing_class=tokenizer  # processing_class可以认为是特征提取器或者数据预处理器
)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [5]:
""" 象征性地训练一下 """
trainer.train()

[34m[1mwandb[0m: Currently logged in as: [33mf499d5[0m ([33m0xf499d5[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss
500,0.5189
1000,0.3221


TrainOutput(global_step=1377, training_loss=0.3573638705473147, metrics={'train_runtime': 246.884, 'train_samples_per_second': 44.572, 'train_steps_per_second': 5.578, 'total_flos': 405114969714960.0, 'train_loss': 0.3573638705473147, 'epoch': 3.0})

In [13]:
""" 浏览预测结果 """
predictions = trainer.predict(eval_dataset)
# predictions有三个主要的属性：predictions, label_ids, metrics
print(f"predictions: \n{predictions.predictions[:10]}\n")
print(f"label_ids: \n{predictions.label_ids[:10]}\n")
print(f"metrics: \n{predictions.metrics}\n")

import numpy as np
preds = np.argmax(predictions.predictions, axis=-1)
print(f"the predicted labels: \n{preds[:10]}")

predictions: 
[[-4.5475845  4.411658 ]
 [ 3.9147272 -3.1745813]
 [ 2.697263  -1.7748233]
 [-4.3770647  4.2906036]
 [ 3.9060833 -3.1893773]
 [-4.612096   4.4749036]
 [-4.092276   3.84595  ]
 [-4.596219   4.4423995]
 [-4.5641766  4.436462 ]
 [-4.5657845  4.5142164]]

label_ids: 
[1 0 0 1 0 1 0 1 1 1]

metrics: 
{'test_loss': 0.942421019077301, 'test_accuracy': 0.8553921568627451, 'test_f1': 0.8991452991452992, 'test_runtime': 2.913, 'test_samples_per_second': 140.06, 'test_steps_per_second': 17.507}

the predicted labels: 
[1 0 0 1 0 1 1 1 1 1]


In [7]:
""" 在训练期间通过监控验证指标来评估训练情况 """
# 1. 在超参数里面设置每个epoch结束后通过计算评估指标来评估模型性能
train_args = TrainingArguments(output_dir="test-trainer", eval_strategy="epoch")

# 2. 定义用于计算评估指标的函数
import evaluate
def compute_metric(eval_preds):
    metric = evaluate.load("glue", "mrpc")  # 加载MRPC数据集对应的评估指标
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

# 3. 在训练的时候监控评估指标
trainer = Trainer(
    model=model,
    args=train_args,
    data_collator=data_collator,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    processing_class=tokenizer,
    compute_metrics=compute_metric
)

# 3. 执行训练/微调
trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,0.724441,0.813725,0.858736
2,0.175800,0.855507,0.857843,0.898601
3,0.075700,0.942421,0.855392,0.899145


TrainOutput(global_step=1377, training_loss=0.10556910534916188, metrics={'train_runtime': 245.4444, 'train_samples_per_second': 44.833, 'train_steps_per_second': 5.61, 'total_flos': 405114969714960.0, 'train_loss': 0.10556910534916188, 'epoch': 3.0})

<a href="https://colab.research.google.com/github/0xf499d5/huggingface_llm_course/blob/main/chapter1/section3-what_can_transformers_do.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<a href="https://colab.research.google.com/github/0xf499d5/huggingface_llm_course/blob/main/chapter1/section3-what_can_transformers_do.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<a href="https://colab.research.google.com/github/0xf499d5/huggingface_llm_course/blob/main/chapter1/section3-what_can_transformers_do.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<a href="https://colab.research.google.com/github/0xf499d5/huggingface_llm_course/blob/main/chapter1/section3-what_can_transformers_do.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<a href="https://colab.research.google.com/github/0xf499d5/huggingface_llm_course/blob/main/chapter1/section3-what_can_transformers_do.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<a href="https://colab.research.google.com/github/0xf499d5/huggingface_llm_course/blob/main/chapter1/section3-what_can_transformers_do.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<a href="https://colab.research.google.com/github/0xf499d5/huggingface_llm_course/blob/main/chapter1/section3-what_can_transformers_do.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<a href="https://colab.research.google.com/github/0xf499d5/huggingface_llm_course/blob/main/chapter1/section3-what_can_transformers_do.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<a href="https://colab.research.google.com/github/0xf499d5/huggingface_llm_course/blob/main/chapter1/section3-what_can_transformers_do.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>