In [1]:
import re
import torch
import torch.nn as nn
from transformers import BertForTokenClassification, BertTokenizer
from transformers import AdamW
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from sklearn.model_selection import train_test_split
import numpy as np
from tqdm import tqdm, trange

In [2]:
file = "biaoji.txt"

In [3]:
all_ner_data = []
with open(file, encoding="utf-8") as f:
    for s in f.readlines():
        s = s.strip('\n')
        ner_data = []
        result_1 = re.finditer(r'\[\@', s)
        result_2 = re.finditer(r'\*\]', s)
        begin = []
        end = []
        for each in result_1:
            begin.append(each.start())
        for each in result_2:
            end.append(each.end())
        assert len(begin) == len(end)
        i = 0
        j = 0
        while i < len(s):
            if i not in begin:
                ner_data.append([s[i], 'O'])
                i = i + 1
            else:
                ann = s[i + 2:end[j] - 2]
                entity, ner = ann.rsplit('#')
                if (len(entity) == 1):
                    ner_data.append([entity, 'S-' + ner])
                else:
                    if (len(entity) == 2):
                        ner_data.append([entity[0], 'B-' + ner])
                        ner_data.append([entity[1], 'E-' + ner])
                    else:
                        ner_data.append([entity[0], 'B-' + ner])
                        for n in range(1, len(entity) - 1):
                            ner_data.append([entity[n], 'I-' + ner])
                        ner_data.append([entity[-1], 'E-' + ner])
        
                i = end[j]
                j = j + 1
        all_ner_data.append(ner_data)
f.close()

In [4]:
all_ner_data_list = []
for seq_list in all_ner_data:
    zi = []
    mark = []
    for zi_mark in seq_list:
        zi.append(zi_mark[0])
        mark.append(zi_mark[1])
        seq_tuple = (zi, mark)
    all_ner_data_list.append(seq_tuple)

In [5]:
all_ner_data_list[0]

(['上', '海', '今', '天', '的', '天', '气', '怎', '么', '样'],
 ['B-Location', 'E-Location', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'])

In [6]:
all_ner_data_list[1]

(['杭', '州', '今', '天', '有', '点', '下', '雨'],
 ['B-Location', 'E-Location', 'O', 'O', 'O', 'O', 'O', 'O'])

In [7]:
all_ner_data_list[2]

(['昨', '天', '成', '都', '出', '太', '阳', '了'],
 ['O', 'O', 'B-Location', 'E-Location', 'O', 'O', 'O', 'O'])

### 输入处理

由于BERT模型的特殊性，需要再处理一下输入:

- input_ids(padding)
- attention_masks
- labels

In [8]:
from keras.preprocessing.sequence import pad_sequences  # padding

Using TensorFlow backend.


In [9]:
print("Is CUDA available: ", torch.cuda.is_available())
n_gpu = torch.cuda.device_count()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("GPU numbers: ", n_gpu)
print("device_name: ", torch.cuda.get_device_name(0))

Is CUDA available:  True
GPU numbers:  2
device_name:  Tesla M40 24GB


In [10]:
# 构建 tag 到 索引 的字典
tag_to_ix = {"B-Location": 0,
             "I-Location": 1, 
             "E-Location": 2, 
             "O": 3,
             "[CLS]":4,
             "[SEP]":5,
             "[PAD]":6}

ix_to_tag = {0:"B-Location", 
             1:"I-Location", 
             2:"E-Location", 
             3:"O",
             4:"[CLS]",
             5:"[SEP]",
             6:"[PAD]"}

In [11]:
all_sentences = []  # 句子
all_labels = []  # labels
for seq_pair in all_ner_data_list:
    sentence = "".join(seq_pair[0])
    labels = [tag_to_ix[t] for t in seq_pair[1]]
    all_sentences.append(sentence)
    all_labels.append(labels)

print(all_sentences)
print(all_labels)

['上海今天的天气怎么样', '杭州今天有点下雨', '昨天成都出太阳了', '今年北京的空气不太好', '冬天的哈尔滨冰天雪地', '武汉有很多樱花树', '金华生产的火腿很出名', '上海在地图上紧挨着杭州', '海南岛在冬天里面也很热']
[[0, 2, 3, 3, 3, 3, 3, 3, 3, 3], [0, 2, 3, 3, 3, 3, 3, 3], [3, 3, 0, 2, 3, 3, 3, 3], [3, 3, 0, 2, 3, 3, 3, 3, 3, 3], [3, 3, 3, 0, 1, 2, 3, 3, 3, 3], [0, 2, 3, 3, 3, 3, 3, 3], [0, 2, 3, 3, 3, 3, 3, 3, 3, 3], [0, 2, 3, 3, 3, 3, 3, 3, 3, 0, 2], [0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3]]


In [12]:
# padding
tokenizer = BertTokenizer.from_pretrained('./bert-chinese/', do_lower_case=True)
tokenized_texts = [tokenizer.encode(sent, add_special_tokens=True) for sent in all_sentences]

In [13]:
tokenized_texts[0]

[101, 677, 3862, 791, 1921, 4638, 1921, 3698, 2582, 720, 3416, 102]

In [14]:
# 句子padding
# 句子最长长度
MAX_LEN = 32

# 输入padding
# 此函数在keras里面
input_ids = pad_sequences([txt for txt in tokenized_texts],
                          maxlen=MAX_LEN, 
                          dtype="long", 
                          truncating="post", 
                          padding="post")

In [15]:
print(len(input_ids[0]))
print(input_ids[0])

32
[ 101  677 3862  791 1921 4638 1921 3698 2582  720 3416  102    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0]


In [16]:
# [3] 代表 O 实体
for label in all_labels:
    label.insert(len(label), 5)  # [SEP]
    label.insert(0, 4) # [CLS]
    if MAX_LEN > len(label) -1:
        for i in range(MAX_LEN - len(label)):
            label.append(3)  # [PAD]

In [17]:
print(len(all_labels[0]))
print(all_labels[0])

32
[4, 0, 2, 3, 3, 3, 3, 3, 3, 3, 3, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]


In [18]:
# 创建attention masks
attention_masks = []

# Create a mask of 1s for each token followed by 0s for padding
for seq in input_ids:
    seq_mask = [float(i > 0) for i in seq]
    attention_masks.append(seq_mask)

In [19]:
# 第一句话的 attention_masks
print(np.array(attention_masks[0]))
print(len(np.array(attention_masks[0])))

[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0.]
32


### 训练集和验证集分开

In [20]:
train_inputs, validation_inputs, train_labels, validation_labels = train_test_split(input_ids, 
                                                                                    all_labels, 
                                                                                    random_state=2019, 
                                                                                    test_size=0.1)
train_masks, validation_masks, _, _ = train_test_split(attention_masks, 
                                                       input_ids,
                                                       random_state=2019, 
                                                       test_size=0.1)

In [21]:
print(len(train_inputs))
print(len(validation_inputs))

print(train_inputs[0])
print(validation_inputs[0])

8
1
[ 101  677 3862 1762 1765 1745  677 5165 2917 4708 3343 2336  102    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0]
[ 101  791 2399 1266  776 4638 4958 3698  679 1922 1962  102    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0]


In [22]:
# tensor化
train_inputs = torch.tensor(train_inputs)
validation_inputs = torch.tensor(validation_inputs)

In [23]:
train_labels = torch.tensor(train_labels)
validation_labels = torch.tensor(validation_labels)

In [24]:
train_masks = torch.tensor(train_masks)
validation_masks = torch.tensor(validation_masks)

In [25]:
train_inputs

tensor([[ 101,  677, 3862, 1762, 1765, 1745,  677, 5165, 2917, 4708, 3343, 2336,
          102,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0],
        [ 101, 1100, 1921, 4638, 1506, 2209, 4012, 1102, 1921, 7434, 1765,  102,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0],
        [ 101, 3343, 2336,  791, 1921, 3300, 4157,  678, 7433,  102,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0],
        [ 101, 7032, 1290, 4495,  772, 4638, 4125, 5597, 2523, 1139, 1399,  102,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0],
        [ 101,  677, 3862,  791, 1921, 4638, 1921, 3698, 2582,  720, 3416,  102,
            0,    0,    0,    0,    0, 

In [26]:
train_labels

tensor([[4, 0, 2, 3, 3, 3, 3, 3, 3, 3, 0, 2, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
         3, 3, 3, 3, 3, 3, 3, 3],
        [4, 3, 3, 3, 0, 1, 2, 3, 3, 3, 3, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
         3, 3, 3, 3, 3, 3, 3, 3],
        [4, 0, 2, 3, 3, 3, 3, 3, 3, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
         3, 3, 3, 3, 3, 3, 3, 3],
        [4, 0, 2, 3, 3, 3, 3, 3, 3, 3, 3, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
         3, 3, 3, 3, 3, 3, 3, 3],
        [4, 0, 2, 3, 3, 3, 3, 3, 3, 3, 3, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
         3, 3, 3, 3, 3, 3, 3, 3],
        [4, 0, 2, 3, 3, 3, 3, 3, 3, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
         3, 3, 3, 3, 3, 3, 3, 3],
        [4, 3, 3, 0, 2, 3, 3, 3, 3, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
         3, 3, 3, 3, 3, 3, 3, 3],
        [4, 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
         3, 3, 3, 3, 3, 3, 3, 3]])

In [27]:
train_masks

tensor([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 

### 创建迭代器

In [28]:
# batch size
batch_size = 16

# 形成训练数据集
train_data = TensorDataset(train_inputs, train_masks, train_labels)  
# 随机采样
train_sampler = RandomSampler(train_data) 
# 读取数据
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)


# 形成验证数据集
validation_data = TensorDataset(validation_inputs, validation_masks, validation_labels)
# 随机采样
validation_sampler = SequentialSampler(validation_data)
# 读取数据
validation_dataloader = DataLoader(validation_data, sampler=validation_sampler, batch_size=batch_size)

### BERT的微调

In [29]:
model = BertForTokenClassification.from_pretrained("./bert-chinese/", num_labels=7)
model.cuda()

BertForTokenClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(21128, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwis

In [30]:
# BERT fine-tuning parameters
param_optimizer = list(model.named_parameters())
no_decay = ['bias', 'LayerNorm.weight']

# 权重衰减
optimizer_grouped_parameters = [
    {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 
     'weight_decay': 0.01},
    {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 
     'weight_decay': 0.0}]

In [31]:
# 优化器
optimizer = AdamW(optimizer_grouped_parameters,
                  lr=5e-5)

In [32]:
# 保存loss
train_loss_set = []
# epochs 
epochs = 100

In [33]:
# BERT training loop
for _ in trange(epochs): 
    ## 训练
    print(f"当前epoch： {_}")
    # 开启训练模式
    model.train()
    tr_loss = 0  # train loss
    nb_tr_examples, nb_tr_steps = 0, 0
    # Train the data for one epoch
    for step, batch in enumerate(train_dataloader):
        # 把batch放入GPU
        batch = tuple(t.to(device) for t in batch)
        # 解包batch
        b_input_ids, b_input_mask, b_labels = batch
        # 梯度归零
        optimizer.zero_grad()
        # 前向传播loss计算
        output = model(input_ids=b_input_ids, 
                       attention_mask=b_input_mask, 
                       labels=b_labels)  
        loss = output[0]
        # print(loss)
        # 反向传播
        loss.backward()
        # Update parameters and take a step using the computed gradient
        # 更新模型参数
        optimizer.step()
        # Update tracking variables
        tr_loss += loss.item()
        nb_tr_examples += b_input_ids.size(0)
        nb_tr_steps += 1
        
    print(f"当前 epoch 的 Train loss: {tr_loss/nb_tr_steps}")

  0%|          | 0/100 [00:00<?, ?it/s]

当前epoch： 0


  2%|▏         | 2/100 [00:00<00:20,  4.75it/s]

当前 epoch 的 Train loss: 1.6812641620635986
当前epoch： 1
当前 epoch 的 Train loss: 1.1144497394561768
当前epoch： 2


  4%|▍         | 4/100 [00:00<00:14,  6.45it/s]

当前 epoch 的 Train loss: 0.8810281157493591
当前epoch： 3
当前 epoch 的 Train loss: 0.6797889471054077
当前epoch： 4


  6%|▌         | 6/100 [00:00<00:11,  7.86it/s]

当前 epoch 的 Train loss: 0.457113653421402
当前epoch： 5
当前 epoch 的 Train loss: 0.3127274215221405
当前epoch： 6
当前 epoch 的 Train loss: 0.2258305698633194
当前epoch： 7


 10%|█         | 10/100 [00:01<00:10,  8.88it/s]

当前 epoch 的 Train loss: 0.15024609863758087
当前epoch： 8
当前 epoch 的 Train loss: 0.10542606562376022
当前epoch： 9
当前 epoch 的 Train loss: 0.0748387798666954
当前epoch： 10


 12%|█▏        | 12/100 [00:01<00:09,  9.22it/s]

当前 epoch 的 Train loss: 0.056700803339481354
当前epoch： 11
当前 epoch 的 Train loss: 0.03915904834866524
当前epoch： 12
当前 epoch 的 Train loss: 0.033539656549692154
当前epoch： 13


 16%|█▌        | 16/100 [00:01<00:08,  9.68it/s]

当前 epoch 的 Train loss: 0.021614719182252884
当前epoch： 14
当前 epoch 的 Train loss: 0.01811095140874386
当前epoch： 15
当前 epoch 的 Train loss: 0.013908264227211475
当前epoch： 16


 19%|█▉        | 19/100 [00:02<00:08,  9.88it/s]

当前 epoch 的 Train loss: 0.012244208715856075
当前epoch： 17
当前 epoch 的 Train loss: 0.008958998136222363
当前epoch： 18
当前 epoch 的 Train loss: 0.007758254650980234
当前epoch： 19


 22%|██▏       | 22/100 [00:02<00:07,  9.83it/s]

当前 epoch 的 Train loss: 0.007714369799941778
当前epoch： 20
当前 epoch 的 Train loss: 0.005681825801730156
当前epoch： 21
当前 epoch 的 Train loss: 0.004784988239407539
当前epoch： 22


 24%|██▍       | 24/100 [00:02<00:07,  9.93it/s]

当前 epoch 的 Train loss: 0.00454092537984252
当前epoch： 23
当前 epoch 的 Train loss: 0.00438546622171998
当前epoch： 24
当前 epoch 的 Train loss: 0.003685894189402461
当前epoch： 25


 28%|██▊       | 28/100 [00:02<00:07, 10.03it/s]

当前 epoch 的 Train loss: 0.0034692392218858004
当前epoch： 26
当前 epoch 的 Train loss: 0.0033438827376812696
当前epoch： 27
当前 epoch 的 Train loss: 0.0031216610223054886
当前epoch： 28


 30%|███       | 30/100 [00:03<00:06, 10.06it/s]

当前 epoch 的 Train loss: 0.002635795157402754
当前epoch： 29
当前 epoch 的 Train loss: 0.0027197340968996286
当前epoch： 30
当前 epoch 的 Train loss: 0.002400750759989023
当前epoch： 31


 32%|███▏      | 32/100 [00:03<00:06, 10.08it/s]

当前 epoch 的 Train loss: 0.0021522357128560543
当前epoch： 32
当前 epoch 的 Train loss: 0.0023155626840889454
当前epoch： 33


 36%|███▌      | 36/100 [00:03<00:06, 10.01it/s]

当前 epoch 的 Train loss: 0.002158252988010645
当前epoch： 34
当前 epoch 的 Train loss: 0.0019987872801721096
当前epoch： 35
当前 epoch 的 Train loss: 0.0019327194895595312
当前epoch： 36


 38%|███▊      | 38/100 [00:03<00:06, 10.02it/s]

当前 epoch 的 Train loss: 0.0018406277522444725
当前epoch： 37
当前 epoch 的 Train loss: 0.00161205162294209
当前epoch： 38
当前 epoch 的 Train loss: 0.001527547836303711
当前epoch： 39


 42%|████▏     | 42/100 [00:04<00:05, 10.09it/s]

当前 epoch 的 Train loss: 0.0014570691855624318
当前epoch： 40
当前 epoch 的 Train loss: 0.0016104242531582713
当前epoch： 41
当前 epoch 的 Train loss: 0.001494801603257656
当前epoch： 42


 44%|████▍     | 44/100 [00:04<00:05, 10.12it/s]

当前 epoch 的 Train loss: 0.0013469094410538673
当前epoch： 43
当前 epoch 的 Train loss: 0.0013700516428798437
当前epoch： 44
当前 epoch 的 Train loss: 0.001347044249996543
当前epoch： 45


 48%|████▊     | 48/100 [00:04<00:05, 10.14it/s]

当前 epoch 的 Train loss: 0.0013221605913713574
当前epoch： 46
当前 epoch 的 Train loss: 0.001282671233639121
当前epoch： 47
当前 epoch 的 Train loss: 0.0011971204075962305
当前epoch： 48


 50%|█████     | 50/100 [00:05<00:04, 10.09it/s]

当前 epoch 的 Train loss: 0.0012012564111500978
当前epoch： 49
当前 epoch 的 Train loss: 0.0010470411507412791
当前epoch： 50
当前 epoch 的 Train loss: 0.0011939069954678416
当前epoch： 51


 54%|█████▍    | 54/100 [00:05<00:04, 10.09it/s]

当前 epoch 的 Train loss: 0.0011231484822928905
当前epoch： 52
当前 epoch 的 Train loss: 0.001151390722952783
当前epoch： 53
当前 epoch 的 Train loss: 0.0010284081799909472
当前epoch： 54


 56%|█████▌    | 56/100 [00:05<00:04, 10.10it/s]

当前 epoch 的 Train loss: 0.0010570340091362596
当前epoch： 55
当前 epoch 的 Train loss: 0.0009991604601964355
当前epoch： 56
当前 epoch 的 Train loss: 0.0011140885762870312
当前epoch： 57


 60%|██████    | 60/100 [00:06<00:03, 10.09it/s]

当前 epoch 的 Train loss: 0.0009600442135706544
当前epoch： 58
当前 epoch 的 Train loss: 0.0009533996344543993
当前epoch： 59
当前 epoch 的 Train loss: 0.000965066603384912
当前epoch： 60


 62%|██████▏   | 62/100 [00:06<00:03, 10.10it/s]

当前 epoch 的 Train loss: 0.0009390012128278613
当前epoch： 61
当前 epoch 的 Train loss: 0.0008783236844465137
当前epoch： 62
当前 epoch 的 Train loss: 0.000892146781552583
当前epoch： 63


 66%|██████▌   | 66/100 [00:06<00:03, 10.10it/s]

当前 epoch 的 Train loss: 0.0009626305545680225
当前epoch： 64
当前 epoch 的 Train loss: 0.0008599550928920507
当前epoch： 65
当前 epoch 的 Train loss: 0.0008910428150556982
当前epoch： 66


 68%|██████▊   | 68/100 [00:06<00:03, 10.12it/s]

当前 epoch 的 Train loss: 0.0009629726409912109
当前epoch： 67
当前 epoch 的 Train loss: 0.0008750324486754835
当前epoch： 68


 70%|███████   | 70/100 [00:07<00:02, 10.03it/s]

当前 epoch 的 Train loss: 0.0008571147918701172
当前epoch： 69
当前 epoch 的 Train loss: 0.0007865843363106251
当前epoch： 70
当前 epoch 的 Train loss: 0.0008033151389099658
当前epoch： 71


 74%|███████▍  | 74/100 [00:07<00:02, 10.10it/s]

当前 epoch 的 Train loss: 0.0008138729026541114
当前epoch： 72
当前 epoch 的 Train loss: 0.000833169266115874
当前epoch： 73
当前 epoch 的 Train loss: 0.0008171226945705712
当前epoch： 74


 76%|███████▌  | 76/100 [00:07<00:02, 10.10it/s]

当前 epoch 的 Train loss: 0.0007963076932355762
当前epoch： 75
当前 epoch 的 Train loss: 0.0008314588922075927
当前epoch： 76
当前 epoch 的 Train loss: 0.0008538909605704248
当前epoch： 77


 80%|████████  | 80/100 [00:08<00:01, 10.12it/s]

当前 epoch 的 Train loss: 0.0007562844548374414
当前epoch： 78
当前 epoch 的 Train loss: 0.0007897667237557471
当前epoch： 79
当前 epoch 的 Train loss: 0.000738081696908921
当前epoch： 80


 82%|████████▏ | 82/100 [00:08<00:01, 10.11it/s]

当前 epoch 的 Train loss: 0.0007813909905962646
当前epoch： 81
当前 epoch 的 Train loss: 0.000755714310798794
当前epoch： 82
当前 epoch 的 Train loss: 0.0007569115841761231
当前epoch： 83


 86%|████████▌ | 86/100 [00:08<00:01, 10.11it/s]

当前 epoch 的 Train loss: 0.0006623734370805323
当前epoch： 84
当前 epoch 的 Train loss: 0.0006632494041696191
当前epoch： 85
当前 epoch 的 Train loss: 0.000745887344237417
当前epoch： 86


 88%|████████▊ | 88/100 [00:08<00:01, 10.11it/s]

当前 epoch 的 Train loss: 0.0007252537761814892
当前epoch： 87
当前 epoch 的 Train loss: 0.000735604262445122
当前epoch： 88
当前 epoch 的 Train loss: 0.0006684738909825683
当前epoch： 89


 92%|█████████▏| 92/100 [00:09<00:00, 10.09it/s]

当前 epoch 的 Train loss: 0.0006985716172493994
当前epoch： 90
当前 epoch 的 Train loss: 0.0007236418314278126
当前epoch： 91
当前 epoch 的 Train loss: 0.0006414237432181835
当前epoch： 92


 94%|█████████▍| 94/100 [00:09<00:00, 10.07it/s]

当前 epoch 的 Train loss: 0.0006995097501203418
当前epoch： 93
当前 epoch 的 Train loss: 0.0006526190554723144
当前epoch： 94
当前 epoch 的 Train loss: 0.0006610103300772607
当前epoch： 95


 98%|█████████▊| 98/100 [00:09<00:00, 10.09it/s]

当前 epoch 的 Train loss: 0.0007120008231140673
当前epoch： 96
当前 epoch 的 Train loss: 0.0006561434711329639
当前epoch： 97
当前 epoch 的 Train loss: 0.0006414029630832374
当前epoch： 98


100%|██████████| 100/100 [00:10<00:00,  9.92it/s]

当前 epoch 的 Train loss: 0.0006339446408674121
当前epoch： 99
当前 epoch 的 Train loss: 0.0006514269625768065





## 验证

In [34]:
# 验证状态
model.eval()

# 建立变量
eval_loss, eval_accuracy = 0, 0
nb_eval_steps, nb_eval_examples = 0, 0
# Evaluate data for one epoch

In [35]:
# 验证集的读取也要batch
for batch in tqdm(validation_dataloader):
    # 元组打包放进GPU
    batch = tuple(t.to(device) for t in batch)
    # 解开元组
    b_input_ids, b_input_mask, b_labels = batch
    # 预测
    with torch.no_grad():
        # segment embeddings，如果没有就是全0，表示单句
        # position embeddings，[0,句子长度-1]
        outputs = model(input_ids=b_input_ids, 
                       attention_mask=b_input_mask,
                       token_type_ids=None,
                       position_ids=None)  
                       
    # print(logits[0])
    # Move logits and labels to CPU
    scores = outputs[0].detach().cpu().numpy()  # 每个字的标签的概率
    pred_flat = np.argmax(scores[0], axis=1).flatten()
    label_ids = b_labels.to('cpu').numpy()  # 真实labels
    # print(logits, label_ids)

100%|██████████| 1/1 [00:00<00:00, 90.81it/s]


In [36]:
pred_flat  # 预测值

array([4, 3, 3, 0, 2, 3, 3, 3, 3, 3, 3, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3])

In [37]:
label_ids  # 真实值

array([[4, 3, 3, 0, 2, 3, 3, 3, 3, 3, 3, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3]])

In [38]:
# 这句话
test_tokens = b_input_ids[0].cpu().numpy()
tokenizer.decode(test_tokens)

'[CLS] 今 年 北 京 的 空 气 不 太 好 [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]'

In [39]:
outputs

(tensor([[[-1.6728, -1.8235, -2.1418, -1.5861,  7.7300, -1.4466, -1.7780],
          [-1.1624, -2.2449, -1.6740,  8.8995, -1.0606, -1.5574, -1.6191],
          [-1.4291, -2.3913, -1.3284,  8.8738, -1.1176, -1.4979, -1.6979],
          [ 7.2947, -0.5525, -0.7209, -1.0707, -1.4258, -1.8513, -1.9203],
          [-0.8750, -0.3866,  8.1095, -1.4140, -1.3495, -1.5834, -1.5541],
          [-1.3535, -2.2559, -1.4714,  8.9831, -0.8705, -1.4904, -1.5459],
          [-1.1316, -2.1854, -1.4373,  8.8713, -1.1364, -1.3050, -1.6238],
          [-1.4988, -2.1450, -1.2738,  8.9729, -1.0865, -1.3208, -1.6016],
          [-1.3457, -2.1436, -1.5477,  9.0416, -0.9512, -1.3986, -1.6326],
          [-1.5001, -2.0713, -1.5804,  9.0707, -0.9897, -1.3081, -1.5980],
          [-1.5735, -2.1474, -1.4551,  9.0090, -0.8773, -1.4328, -1.6618],
          [-1.4617, -1.2341, -1.1523, -1.5837, -0.5416,  8.2302, -1.1377],
          [-1.1837, -2.4602, -1.2300,  7.2131,  0.5483, -1.3806, -1.7415],
          [-1.1510, -2.43