In [17]:
"""
BERTの勉強 note2
"""
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim

from transformers import AutoModel, AutoTokenizer
from transformers import BertForSequenceClassification
from transformers import TrainingArguments, Trainer

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import glob, pickle

pretrained_model_name = "cl-tohoku/bert-base-japanese"

In [2]:
# トークナイズ処理はnote1で実施済み、ファイルからロードするものとする

In [2]:
# タスク用Datasetクラスを定義
class LivedoorDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels
    
    def __getitem__(self, idx):
        item = { key: torch.tensor(val[idx]) for key, val in self.encodings.items() }
        item["labels"] = torch.tensor(self.labels[idx]) # item["label"]でなくitem["labels"]が正しい！
        return item
    
    def __len__(self):
        return len(self.labels)     

In [3]:
# 保存済みDatasetをpklからロード
with open("../../DataSet/ldcc/dataloader/ds_train.pkl", "rb") as f:
    ds_train = pickle.load(f)
with open("../../DataSet/ldcc/dataloader/ds_valid.pkl", "rb") as f:
    ds_valid = pickle.load(f)
with open("../../DataSet/ldcc/dataloader/ds_test.pkl", "rb") as f:
    ds_test = pickle.load(f)

In [5]:
next(iter(ds_test))

{'input_ids': tensor([    2, 18060,    11,  3579,  2713,     7,  2110,    16,    33,   140,
            53,     9, 20456,     7,   707,     5,    12,     9,    80,  3635,
           205,    29,     8,  4799,    53,     9,  1281, 28516,     5,    45,
         18060,    11,  3579,     7,    15,    16,  1497,  3876,    11,  1720,
            16,   546,    16,     9,  1704,  3635,   205,  2935,  6294, 29491,
         13945, 28697,  3042,  9308,     9,     6, 19130,  6286,    12,  1698,
            34,    45,    28,   203,  1379,     5,  3579,     5,   124,     7,
         18060,    11,   666,    34,    45,    14,   392,     8,   604, 10558,
          6708, 11604,    49,     6,  1040, 18469,   241,     5,  9999,    28,
          2367,    16,    33,     8,  3579,  1197,    14,  6656,    16,    33,
         18060,    11,   221,  3579, 11484,   666,     8,  3876,    50,    28,
         18917, 28457,    82,     6,  5523,  5402,   255,   666,    34,    45,
            14,   392,     8,  3225,   

#### データの準備ここまで
ここからはtransformersを活用\
今回はプリセットモデルBertForSequencialClassificationを使う

In [6]:
# スクラッチしていたときはDataLoaderでバッチ化していたが
# Trainerを使うのでもうやらなくていい

# batch_size_train = 16
# batch_size_val = 64

# bt_train = DataLoader(ds_train, batch_size=batch_size_train)
# bt_val = DataLoader(ds_valid, batch_size=batch_size_val)
# dataloader_dict = {"train": bt_train, "val": bt_val}

In [4]:
"""
ファインチューニング用モデルは読み込むだけ
"""
alt_model = BertForSequenceClassification.from_pretrained(pretrained_model_name, num_labels=9)

Some weights of the model checkpoint at cl-tohoku/bert-base-japanese were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model ch

In [8]:
alt_model

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(32000, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, element

#### 学習工程はTrainerを使って定義
* TrainingArgumentsでコンフィグ指定
* Trainerインスタンス作成
    - モデルやデータセットはここで渡す
    - 必要に応じて評価時のメトリクス計算関数をセット（accとかprとかf1とか）
* Trainer.train()で学習

In [5]:
# 評価関数の設定
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='macro')
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

In [6]:
# TrainingArguments, Trainerを定義
training_args = TrainingArguments(
    output_dir='./preset_outputs/',
    num_train_epochs=1,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=8,
    warmup_steps=100,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    no_cuda=False,
    evaluation_strategy='steps',
    eval_steps=10
)

if "trainer" in locals():
    del trainer

trainer = Trainer(
    model=alt_model,
    args=training_args,
    train_dataset=ds_train,
    eval_dataset=ds_valid,
    compute_metrics=compute_metrics
)

In [7]:
# ファインチューニング
trainer.train()

***** Running training *****
  Num examples = 5893
  Num Epochs = 1
  Instantaneous batch size per device = 4
  Total train batch size (w. parallel, distributed & accumulation) = 4
  Gradient Accumulation steps = 1
  Total optimization steps = 1474


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
10,2.2549,2.282483,0.063772,0.04266,0.030775,0.079843
20,2.2306,2.223669,0.130258,0.079591,0.084067,0.149683
30,2.148,2.106574,0.29308,0.185446,0.189665,0.257002
40,2.0974,2.025405,0.265943,0.145828,0.243751,0.233014
50,1.9151,1.863482,0.453189,0.340263,0.328339,0.407672
60,1.9348,1.738338,0.50882,0.445642,0.644198,0.468371
70,1.6741,1.553525,0.544098,0.496598,0.599242,0.515401
80,1.5392,1.295636,0.686567,0.614431,0.763448,0.642008
90,1.1875,1.079493,0.724559,0.640673,0.673621,0.680546
100,1.2869,1.004042,0.68521,0.593198,0.651753,0.637093


***** Running Evaluation *****
  Num examples = 737
  Batch size = 8
  _warn_prf(average, modifier, msg_start, len(result))
***** Running Evaluation *****
  Num examples = 737
  Batch size = 8
  _warn_prf(average, modifier, msg_start, len(result))
***** Running Evaluation *****
  Num examples = 737
  Batch size = 8
  _warn_prf(average, modifier, msg_start, len(result))
***** Running Evaluation *****
  Num examples = 737
  Batch size = 8
  _warn_prf(average, modifier, msg_start, len(result))
***** Running Evaluation *****
  Num examples = 737
  Batch size = 8
  _warn_prf(average, modifier, msg_start, len(result))
***** Running Evaluation *****
  Num examples = 737
  Batch size = 8
  _warn_prf(average, modifier, msg_start, len(result))
***** Running Evaluation *****
  Num examples = 737
  Batch size = 8
  _warn_prf(average, modifier, msg_start, len(result))
***** Running Evaluation *****
  Num examples = 737
  Batch size = 8
***** Running Evaluation *****
  Num examples = 737
  Batch siz

  Num examples = 737
  Batch size = 8
***** Running Evaluation *****
  Num examples = 737
  Batch size = 8
***** Running Evaluation *****
  Num examples = 737
  Batch size = 8
***** Running Evaluation *****
  Num examples = 737
  Batch size = 8
***** Running Evaluation *****
  Num examples = 737
  Batch size = 8
***** Running Evaluation *****
  Num examples = 737
  Batch size = 8
***** Running Evaluation *****
  Num examples = 737
  Batch size = 8
***** Running Evaluation *****
  Num examples = 737
  Batch size = 8
***** Running Evaluation *****
  Num examples = 737
  Batch size = 8
***** Running Evaluation *****
  Num examples = 737
  Batch size = 8
***** Running Evaluation *****
  Num examples = 737
  Batch size = 8
***** Running Evaluation *****
  Num examples = 737
  Batch size = 8
***** Running Evaluation *****
  Num examples = 737
  Batch size = 8
***** Running Evaluation *****
  Num examples = 737
  Batch size = 8
***** Running Evaluation *****
  Num examples = 737
  Batch size 

TrainOutput(global_step=1474, training_loss=0.5198186491003192, metrics={'train_runtime': 3524.1503, 'train_samples_per_second': 1.672, 'train_steps_per_second': 0.418, 'total_flos': 1550610899278848.0, 'train_loss': 0.5198186491003192, 'epoch': 1.0})

In [9]:
# validationでの性能評価
trainer.evaluate()

***** Running Evaluation *****
  Num examples = 737
  Batch size = 8


{'eval_loss': 0.2705237567424774,
 'eval_accuracy': 0.9308005427408412,
 'eval_f1': 0.9205819442899481,
 'eval_precision': 0.9203719758246949,
 'eval_recall': 0.9225340138481999,
 'eval_runtime': 19.4609,
 'eval_samples_per_second': 37.871,
 'eval_steps_per_second': 4.779,
 'epoch': 1.0}

In [11]:
# ファインチューニングしたモデルをテストデータで性能評価
trainer.evaluate(ds_test)

***** Running Evaluation *****
  Num examples = 737
  Batch size = 8


{'eval_loss': 0.34525835514068604,
 'eval_accuracy': 0.9240162822252375,
 'eval_f1': 0.9176564379716505,
 'eval_precision': 0.9213396751918022,
 'eval_recall': 0.9173713737708721,
 'eval_runtime': 19.7066,
 'eval_samples_per_second': 37.399,
 'eval_steps_per_second': 4.719,
 'epoch': 1.0}

In [12]:
# モデルの保存
alt_model.save_pretrained("./preset_outputs/fine-tuned")


Configuration saved in ./preset_outputs/fine-tuned\config.json
Model weights saved in ./preset_outputs/fine-tuned\pytorch_model.bin


In [13]:
# 学習済みモデルをロードして使ってみる
fine_tuned_model_path = "./preset_outputs/fine-tuned/"
fine_tuned_model = BertForSequenceClassification.from_pretrained(fine_tuned_model_path, num_labels=9)
fine_tuned_trainer = Trainer(fine_tuned_model)

loading configuration file ./preset_outputs/fine-tuned/config.json
Model config BertConfig {
  "_name_or_path": "cl-tohoku/bert-base-japanese",
  "architectures": [
    "BertForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5",
    "6": "LABEL_6",
    "7": "LABEL_7",
    "8": "LABEL_8"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4,
    "LABEL_5": 5,
    "LABEL_6": 6,
    "LABEL_7": 7,
    "LABEL_8": 8
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "pro

In [18]:
# 推論実行
raw_preds, _, _ = fine_tuned_trainer.predict(ds_test)
preds = np.argmax(raw_preds, axis=1)

***** Running Prediction *****
  Num examples = 737
  Batch size = 8


In [33]:
# ラベルだけ取り出す
y_truth = np.array([ int(test_data["labels"]) for test_data in ds_test ])
y_pred = np.argmax(raw_preds, axis=1)

In [42]:
y_pred


array([2, 4, 0, 1, 3, 6, 3, 1, 0, 8, 3, 6, 5, 4, 1, 0, 6, 6, 5, 4, 4, 7,
       5, 1, 8, 0, 1, 7, 3, 0, 1, 6, 6, 5, 7, 0, 1, 6, 8, 6, 7, 6, 8, 6,
       3, 6, 5, 7, 0, 2, 4, 7, 6, 1, 1, 3, 4, 8, 3, 2, 8, 1, 7, 2, 0, 6,
       0, 1, 5, 1, 7, 0, 5, 7, 7, 7, 1, 8, 7, 5, 7, 2, 7, 4, 2, 2, 5, 6,
       3, 8, 4, 4, 7, 4, 2, 0, 3, 4, 4, 5, 6, 8, 3, 8, 2, 8, 8, 1, 0, 8,
       4, 6, 5, 7, 1, 6, 1, 8, 1, 5, 5, 3, 4, 6, 5, 0, 1, 0, 3, 3, 7, 8,
       6, 3, 4, 1, 8, 7, 7, 8, 3, 5, 1, 2, 2, 4, 4, 0, 5, 1, 2, 0, 8, 4,
       6, 8, 6, 8, 7, 8, 2, 4, 6, 1, 8, 5, 4, 4, 4, 1, 4, 8, 3, 7, 5, 1,
       5, 2, 6, 8, 5, 3, 4, 7, 6, 1, 1, 7, 5, 6, 5, 8, 4, 8, 0, 4, 3, 6,
       5, 6, 0, 2, 1, 3, 4, 4, 1, 5, 1, 7, 8, 5, 5, 7, 2, 0, 7, 5, 0, 5,
       8, 4, 2, 6, 6, 6, 4, 3, 7, 5, 6, 5, 4, 0, 4, 8, 7, 2, 4, 1, 3, 0,
       0, 1, 3, 1, 7, 5, 5, 6, 7, 8, 8, 7, 2, 0, 3, 5, 1, 5, 1, 4, 1, 5,
       1, 7, 4, 7, 7, 8, 5, 8, 6, 5, 1, 4, 6, 0, 2, 7, 7, 2, 6, 3, 2, 6,
       3, 1, 1, 4, 0, 6, 1, 6, 4, 6, 6, 1, 1, 2, 6,

In [49]:
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score


accuracy = accuracy_score(y_true=y_truth, y_pred=y_pred)
recall = recall_score(y_true=y_truth, y_pred=y_pred, average='macro') # 多クラス分類の場合は平均をmicroとかmacroとか選択する　[None, 'micro', 'macro', 'weighted']
precision = precision_score(y_true=y_truth, y_pred=y_pred, average='macro')
f1 = f1_score(y_true=y_truth, y_pred=y_pred, average='macro')

print(f"Accuracy: {accuracy:.4f}| Recall: {recall:.4f}| Precision: {precision:.4f}|F1: {f1:.4f}")

Accuracy: 0.9240| Recall: 0.9174| Precision: 0.9213|F1: 0.9177
