In [2]:
# basic
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
from tqdm import tqdm
import sklearn

# np/pd
import numpy as np
import pandas as pd

# torch
import torch
import torchtext
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence

# transformer
from datasets import load_dataset

# CRF
from torchcrf import CRF

In [3]:
from torchtext.vocab import GloVe

global_vectors = GloVe(name='840B', dim=300, cache='../../data/Glove840B300d/')

In [29]:
print(global_vectors.get_vecs_by_tokens(['the','world','will','be','better']).shape)

torch.Size([5, 300])


In [30]:
torch.cuda.is_available()

True

In [129]:
Config = {
    'num_tags':9,
    'num_layers':1,
    'embedding_dim':300,
    'vocab_size':30289,
    'hidden_dim':100,
    'batch_size':32
}

## 探查conll2003数据

定义dataset，dataLoader

In [34]:
# data_udpos = torchtext.datasets.UDPOS(root='./torchtext_datasets_udpos/', split=('train','valid','test'))
dataset_conll2003 = load_dataset("conll2003")

Reusing dataset conll2003 (/root/.cache/huggingface/datasets/conll2003/conll2003/1.0.0/9a4d16a94f8674ba3466315300359b0acd891b68b6c8743ddf60b9c702adce98)


  0%|          | 0/3 [00:00<?, ?it/s]

In [35]:
dataset_conll2003['test'][0]

{'id': '0',
 'tokens': ['SOCCER',
  '-',
  'JAPAN',
  'GET',
  'LUCKY',
  'WIN',
  ',',
  'CHINA',
  'IN',
  'SURPRISE',
  'DEFEAT',
  '.'],
 'pos_tags': [21, 8, 22, 37, 22, 22, 6, 22, 15, 12, 21, 7],
 'chunk_tags': [11, 0, 11, 21, 11, 12, 0, 11, 13, 11, 12, 0],
 'ner_tags': [0, 0, 5, 0, 0, 0, 0, 1, 0, 0, 0, 0]}

In [36]:
ner_tag2id = {'O': 0, 'B-PER': 1, 'I-PER': 2, 'B-ORG': 3, 'I-ORG': 4, 'B-LOC': 5, 'I-LOC': 6, 'B-MISC': 7, 'I-MISC': 8}

ner_id2tag = {}
for key in ner_tag2id.keys():
    ner_id2tag[ner_tag2id[key]] = key

In [37]:
ner_id2tag

{0: 'O',
 1: 'B-PER',
 2: 'I-PER',
 3: 'B-ORG',
 4: 'I-ORG',
 5: 'B-LOC',
 6: 'I-LOC',
 7: 'B-MISC',
 8: 'I-MISC'}

In [38]:
def ner_id_to_tags(ner_id_seq):
    res1, res2 = [], []
    for ner_id in ner_id_seq:
        res1.append(ner_id2tag.get(ner_id, ''))
        res2.append(ner_id2tag.get(ner_id, '-').split('-')[-1])
    return res1, res2

ner_id_to_tags(dataset_conll2003['train'][0]['ner_tags'])

(['B-ORG', 'O', 'B-MISC', 'O', 'O', 'O', 'B-MISC', 'O', 'O'],
 ['ORG', 'O', 'MISC', 'O', 'O', 'O', 'MISC', 'O', 'O'])

In [39]:
m = 0
for k in dataset_conll2003.keys():
    for i in dataset_conll2003[k]['tokens']:
        m = max(m, len(i))
print('max length = {}'.format(m))

max length = 124


In [40]:
word_to_ix = {}
for k in dataset_conll2003.keys():
    for tokens in dataset_conll2003[k]['tokens']:
        for token in tokens:
            if token not in word_to_ix:
                word_to_ix[token] = len(word_to_ix)
                
Config['vocab_size'] = len(word_to_ix)

print(Config['vocab_size'])

30289


In [42]:
def func_word2ix(word_to_ix, token_list):
    res = list()
    for token in token_list:
        res.append(word_to_ix.get(token, len(word_to_ix)+1))
    return {'token_ids':res}

In [43]:
dataset_conll2003_train=dataset_conll2003['train'].map(lambda x: func_word2ix(word_to_ix, x['tokens']))
dataset_conll2003_train.set_format(type="torch", columns=['token_ids','ner_tags'])

dataset_conll2003_test=dataset_conll2003['test'].map(lambda x: func_word2ix(word_to_ix, x['tokens']))
dataset_conll2003_test.set_format(type="torch", columns=['token_ids','ner_tags'])

dataset_conll2003_val=dataset_conll2003['validation'].map(lambda x: func_word2ix(word_to_ix, x['tokens']))
dataset_conll2003_val.set_format(type="torch", columns=['token_ids','ner_tags'])

Loading cached processed dataset at /root/.cache/huggingface/datasets/conll2003/conll2003/1.0.0/9a4d16a94f8674ba3466315300359b0acd891b68b6c8743ddf60b9c702adce98/cache-e993f22e7c8f1977.arrow
Loading cached processed dataset at /root/.cache/huggingface/datasets/conll2003/conll2003/1.0.0/9a4d16a94f8674ba3466315300359b0acd891b68b6c8743ddf60b9c702adce98/cache-9da2d1c0e9528511.arrow
Loading cached processed dataset at /root/.cache/huggingface/datasets/conll2003/conll2003/1.0.0/9a4d16a94f8674ba3466315300359b0acd891b68b6c8743ddf60b9c702adce98/cache-586edb6c12fcdba7.arrow


In [44]:
class MyDataset(Dataset):

    def __init__(self, data):
        self.data = data
        self.token_ids = self.data['token_ids'] # 在这变成torch.tensor，但长度不同
        self.ner_tags = self.data['ner_tags']
        self.tokens = self.data['tokens']
        
    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
#         curr = dict()
#         curr['token_ids'] = self.token_ids
#         curr['ner_tags'] = self.ner_tags
        return self.token_ids[index], self.ner_tags[index], self.tokens[index]


def collate_fn_padd(batch):
    '''
    Padds batch of variable length

    note: it converts things ToTensor manually here since the ToTensor transform
    assume it takes in images rather than arbitrary tensors.
    '''
    x, y, z = zip(*batch)
    x_lens = [len(x_i) for x_i in x]
    y_lens = [len(y_i) for y_i in y]
    x_pad = torch.nn.utils.rnn.pad_sequence(x, batch_first=True)
    y_pad = torch.nn.utils.rnn.pad_sequence(y, batch_first=True)
    return x_pad, torch.tensor(x_lens), y_pad, torch.tensor(y_lens), z
    
dataset_conll2003_train_loader = DataLoader(
    MyDataset(dataset_conll2003_train),
    batch_size=32,
    shuffle=True, 
    collate_fn=lambda x: collate_fn_padd(x))


dataset_conll2003_test_loader = DataLoader(
    MyDataset(dataset_conll2003_test),
    batch_size=32,
    shuffle=True, 
    collate_fn=lambda x: collate_fn_padd(x))

In [45]:
next(iter(dataset_conll2003_train_loader))[4][0]

['SEOUL', '1996-08-25']

In [46]:
print(dataset_conll2003_train_loader.dataset.__len__(), dataset_conll2003_test_loader.dataset.__len__())

14041 3453


## token-emb

In [100]:
glove_weight = global_vectors.get_vecs_by_tokens(list(word_to_ix.keys()))
embedding_glove = nn.Embedding.from_pretrained(glove_weight)

In [102]:
(1-glove_weight[0]==embedding_glove(torch.LongTensor([0]))).sum()

tensor(0)

## 模型定义

In [117]:
class GLove_BiLSTM(nn.Module):

    def __init__(self, config=None, embedding_weight = None):
        super(GLove_BiLSTM, self).__init__()
        self.config = config

        # BiLSTM-model 给 emission 层定义参数
        self.embedding_dim = self.config.get('embedding_dim', 300)
        self.hidden_dim = self.config.get('hidden_dim', 200)
        self.vocab_size = self.config.get('vocab_size', 30289)

        if embedding_weight is not None:
            self.word_embeds = nn.Embedding.from_pretrained(embedding_weight)
        else:
            self.word_embeds = nn.Embedding(self.vocab_size, self.embedding_dim)            
            
        self.target_size = self.config.get('num_tags', 9)
        self.num_layers = self.config.get('num_layers',1)
        self.batch_size = self.config.get('batch_size',16)
        self.bidirectional = True

        # lstm
        self.lstm = nn.LSTM(input_size=self.embedding_dim, hidden_size=self.hidden_dim//2,
                            num_layers=self.num_layers, bidirectional=self.bidirectional)
        self.hidden2tag1 = nn.Linear(self.hidden_dim, self.target_size*3)
        self.hidden2tag2 = nn.Linear(self.target_size*3, self.target_size)
        self.dropout010 = nn.Dropout(0.1)
        self.dropout020 = nn.Dropout(0.2)
#         self.hidden_init = self.init_hidden()

        # CRF-model
        self.crf = CRF(self.config.get('num_tags', 9), batch_first=True)

    def init_hidden(self):
        hidden = torch.zeros(self.num_layers*2 if self.bidirectional else self.num_layers,
                             self.batch_size, self.hidden_dim//2)
        cell = torch.zeros(self.num_layers*2 if self.bidirectional else self.num_layers,
                             self.batch_size, self.hidden_dim//2)
        return hidden, cell

    def forward(self, sent, sent_len):
        """

        :param sent: 输入的已转换为token_id的句子，(batch_len * sent_len * token_emb_len)
        :param sent_len: tensor(list(int))
        :return:
        """
        embeds = self.word_embeds(sent)
        embed_packed = pack_padded_sequence(embeds, lengths=sent_len.to('cpu'),
                                            batch_first=True,
                                            enforce_sorted=False)
        lstm_out, (hidden, cell) = self.lstm(embed_packed) #, self.hidden_init)
        lstm_out, lens = pad_packed_sequence(lstm_out, batch_first=True)
        tag_score = self.hidden2tag1(lstm_out)
        tag_score = self.dropout010(tag_score)
        tag_score = self.hidden2tag2(tag_score)
#         tag_score = nn.functional.softmax(tag_score, dim=-1)
        return tag_score

In [220]:
model_name = 'V2-Emb=Glove300-bilstmLayer=1Hidden=200Dropout0.1Batch=32Learn=1e-1'
model_lstm = GLove_BiLSTM(config=Config, embedding_weight=glove_weight)

In [221]:
model_lstm = model_lstm.cuda()

In [222]:
model_lstm.parameters

<bound method Module.parameters of GLove_BiLSTM(
  (word_embeds): Embedding(30289, 300)
  (lstm): LSTM(300, 50, bidirectional=True)
  (hidden2tag1): Linear(in_features=100, out_features=27, bias=True)
  (hidden2tag2): Linear(in_features=27, out_features=9, bias=True)
  (dropout010): Dropout(p=0.1, inplace=False)
  (dropout020): Dropout(p=0.2, inplace=False)
  (crf): CRF(num_tags=9)
)>

In [223]:
# 手动计算验证权重
import collections
ner_tags_all = torch.cat(dataset_conll2003_train['ner_tags'])
t=collections.Counter(ner_tags_all.numpy())
res = []
for k in t:
    res.append((k, len(ner_tags_all)/9/t[k]))
print(res)

[(3, 3.5792683998664065), (0, 0.1334168085220698), (7, 6.580731691551936), (1, 3.4279629629629627), (2, 4.996589124460149), (5, 3.1687052598817305), (4, 6.108141348692104), (8, 19.58835978835979), (6, 19.554499183712664)]


In [224]:
class_weights=sklearn.utils.class_weight.compute_class_weight(
    class_weight='balanced',classes=np.unique(ner_tags_all),y=ner_tags_all.numpy())
class_weights=torch.tensor(class_weights,dtype=torch.float)
class_weights

tensor([ 0.1334,  3.4280,  4.9966,  3.5793,  6.1081,  3.1687, 19.5545,  6.5807,
        19.5884])

In [225]:
optimizer = torch.optim.Adam(model_lstm.parameters(), lr=1e-1)
loss_fn = nn.CrossEntropyLoss(reduction='mean', weight=class_weights.cuda()) 
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)

In [226]:
def train_eval_single_epoch(model, data_iter=None, optimizer=None, loss_fn=None, is_train=False):
    if is_train:
        model.train()
    else:
        model.eval()   
    correct_curr, correct_sum, loss_sum, loss_curr = 0, 0, 0, 0
    loss_list, accuracy_list = [], []
    print('Total (training) batch = {}'.format(len(data_iter)))
    batch_i = 0
    data_iter_len = data_iter.dataset.__len__() # total sample num.
    batch_num = len(data_iter)
    
    batch_loss_list = list()
    logits_list = list()
    y_list, y_len_list = [], []
    for batch_data in data_iter:
        torch.cuda.empty_cache()
        batch_i += 1
        if is_train:
            optimizer.zero_grad()
#             model.zero_grad()
        x, x_len, y, y_len, _ = batch_data
        x = x.cuda()
        x_len = x_len.cuda()
        y = y.cuda()
        # model predict 
        logits = model(x, x_len)
        assert logits.shape[0:2]==x.shape[0:2]
        # compute loss 
        batch_loss = 0
        for i in range(logits.size(0)): # num of samples in one batch
            loss = loss_fn(logits[i], y[i])
            batch_loss += loss 
        
        batch_loss /= logits.size(0)
        if is_train:
            batch_loss.backward()
            optimizer.step()
        
        # 记录
        batch_loss_list.append(batch_loss.item())
        logits_list.append(logits)
        y_list.append(y)
        y_len_list.append(y_len)
#         if batch_i%100==0:
#             print(batch_loss.item())
        
        x = x.cpu()
        x_len = x_len.cpu()
        y = y.cpu()
        
#     print(sum(batch_loss_list)/batch_num)
    return batch_loss_list, logits_list, y_list, y_len_list

In [227]:
def func_eval(model, data_iter=None, loss_fn=None):
    model.eval()
    correct_curr, correct_sum, loss_sum, loss_curr = 0, 0, 0, 0
    loss_list, accuracy_list = [], []
    print('Total (training) batch = {}'.format(len(data_iter)))
    batch_i = 0
    data_iter_len = data_iter.dataset.__len__() # total sample num.
    batch_num = len(data_iter)
    
    batch_loss_list = list()
    logits_list = list()
    y_list, y_len_list = [], []
    for batch_data in data_iter:
        batch_i += 1
        x, x_len, y, y_len,_ = batch_data
        x = x.cuda()
        x_len = x_len.cuda()
        y = y.cuda()
        # model predict 
        logits = model(x, x_len)
        assert logits.shape[0:2]==x.shape[0:2]
        # compute loss 
        batch_loss = 0
        for i in range(logits.size(0)): # num of samples in one batch
            loss = loss_fn(logits[i], y[i])
            batch_loss += loss 
        
        batch_loss /= logits.size(0)
        
        # 记录
        batch_loss_list.append(batch_loss.item())
        logits_list.append(logits)
        y_list.append(y)
        y_len_list.append(y_len)
#         if batch_i%100==0:
#             print(batch_loss.item())
        
        x = x.cpu()
        x_len = x_len.cpu()
        y = y.cpu()
        
#     print(sum(batch_loss_list)/batch_num)
    return batch_loss_list, logits_list, y_list, y_len_list

## 训练开始

In [228]:
precision_test_max, precision_test_curr = 0, 0

for epoch in tqdm(range(50)):
    print('='*50)
    print('Epoch = {}'.format(epoch))
    print('='*50)
    batch_loss_list, logits_list, y_list, y_len_list = train_eval_single_epoch(model_lstm, 
                                                                               dataset_conll2003_train_loader, 
                                                                               optimizer=optimizer,
                                                                               loss_fn=loss_fn,
                                                                               is_train=True)
    scheduler.step() # 加上后好一些
    print('training loss = {:.8f}'.format(sum(batch_loss_list)/len(batch_loss_list)))
    test_batch_loss_list, test_logits_list, test_y_list, test_y_len_list = func_eval(model_lstm, 
                                                                               dataset_conll2003_test_loader, 
                                                                               loss_fn=loss_fn)
    print('testing loss = {:.8f}'.format(sum(test_batch_loss_list)/len(test_batch_loss_list)))
    
    # 评估
    train_dict = MyTools.func_cal_accu_recall(logits_list=logits_list, y_list=y_list, y_len_list=y_len_list)
    test_dict = MyTools.func_cal_accu_recall(logits_list=test_logits_list, y_list=test_y_list, 
                                             y_len_list=test_y_len_list)
    
    print('Train metrics')
    train_metric = MyTools.func_cal_metrics(train_dict)
    print('Test metrics')
    test_metric = MyTools.func_cal_metrics(test_dict)
    
    # save model if current f1 rate is better than previous ones
    recall_test_curr, precision_test_curr, accuracy_test_curr = test_metric
    if precision_test_curr > precision_test_max:
        torch.save(model_lstm.state_dict(), './models/model_v2_'+model_name+'epoch='+str(epoch)+
                   'accu='+str(round(accuracy_test_curr,4))+
                   'recall='+str(round(recall_test_curr,4))+
                   'precision='+str(round(precision_test_curr,4)))
    precision_test_max = max(precision_test_curr, precision_test_max)

  0%|          | 0/50 [00:00<?, ?it/s]

Epoch = 0
Total (training) batch = 439
training loss = 0.64664103
Total (training) batch = 108
testing loss = 0.62670536


  assert sum(mask.sum(axis=1) == y_len_list[logit_i]) // batch_len == 1
  2%|▏         | 1/50 [01:14<1:00:50, 74.49s/it]

Train metrics
recall = 67.98%, precision = 59.16%, accuracy = 86.80%
Test metrics
recall = 66.32%, precision = 62.99%, accuracy = 87.31%
Epoch = 1
Total (training) batch = 439
training loss = 0.46238990
Total (training) batch = 108
testing loss = 0.45559920


  4%|▍         | 2/50 [02:34<1:02:09, 77.69s/it]

Train metrics
recall = 76.53%, precision = 66.94%, accuracy = 89.76%
Test metrics
recall = 77.56%, precision = 66.85%, accuracy = 89.36%
Epoch = 2
Total (training) batch = 439
training loss = 0.37181192
Total (training) batch = 108
testing loss = 0.48400327


  6%|▌         | 3/50 [04:26<1:13:04, 93.28s/it]

Train metrics
recall = 80.44%, precision = 70.85%, accuracy = 91.20%
Test metrics
recall = 78.90%, precision = 68.57%, accuracy = 89.99%
Epoch = 3
Total (training) batch = 439
training loss = 0.33776038
Total (training) batch = 108
testing loss = 0.44892061


  8%|▊         | 4/50 [05:44<1:07:05, 87.50s/it]

Train metrics
recall = 82.58%, precision = 73.46%, accuracy = 92.10%
Test metrics
recall = 77.19%, precision = 71.30%, accuracy = 90.59%
Epoch = 4
Total (training) batch = 439
training loss = 0.28626353
Total (training) batch = 108
testing loss = 0.39135029


 10%|█         | 5/50 [07:03<1:03:12, 84.27s/it]

Train metrics
recall = 84.48%, precision = 75.71%, accuracy = 92.87%
Test metrics
recall = 80.70%, precision = 73.87%, accuracy = 91.64%
Epoch = 5
Total (training) batch = 439
training loss = 0.25619031
Total (training) batch = 108
testing loss = 0.47189575


 12%|█▏        | 6/50 [08:22<1:00:29, 82.48s/it]

Train metrics
recall = 86.37%, precision = 77.10%, accuracy = 93.43%
Test metrics
recall = 79.50%, precision = 72.15%, accuracy = 91.06%
Epoch = 6
Total (training) batch = 439
training loss = 0.23247127
Total (training) batch = 108
testing loss = 0.46635179


 14%|█▍        | 7/50 [09:40<57:58, 80.89s/it]  

Train metrics
recall = 87.46%, precision = 78.36%, accuracy = 93.87%
Test metrics
recall = 80.56%, precision = 70.40%, accuracy = 90.69%
Epoch = 7
Total (training) batch = 439
training loss = 0.20809575
Total (training) batch = 108
testing loss = 0.42242485


 16%|█▌        | 8/50 [10:57<55:55, 79.89s/it]

Train metrics
recall = 88.29%, precision = 79.53%, accuracy = 94.24%
Test metrics
recall = 81.19%, precision = 77.90%, accuracy = 92.69%
Epoch = 8
Total (training) batch = 439
training loss = 0.18775487
Total (training) batch = 108
testing loss = 0.40796615


 18%|█▊        | 9/50 [12:15<54:01, 79.06s/it]

Train metrics
recall = 89.29%, precision = 80.30%, accuracy = 94.55%
Test metrics
recall = 83.63%, precision = 73.55%, accuracy = 91.89%
Epoch = 9
Total (training) batch = 439
training loss = 0.16998234
Total (training) batch = 108
testing loss = 0.39200763


 20%|██        | 10/50 [13:31<52:09, 78.24s/it]

Train metrics
recall = 90.20%, precision = 81.01%, accuracy = 94.83%
Test metrics
recall = 83.26%, precision = 71.61%, accuracy = 91.31%
Epoch = 10
Total (training) batch = 439
training loss = 0.14467941
Total (training) batch = 108
testing loss = 0.39120234


 22%|██▏       | 11/50 [14:49<50:49, 78.18s/it]

Train metrics
recall = 91.59%, precision = 83.16%, accuracy = 95.49%
Test metrics
recall = 84.17%, precision = 73.51%, accuracy = 91.94%
Epoch = 11
Total (training) batch = 439
training loss = 0.13406960
Total (training) batch = 108
testing loss = 0.41049089


 24%|██▍       | 12/50 [16:07<49:27, 78.10s/it]

Train metrics
recall = 92.26%, precision = 83.69%, accuracy = 95.70%
Test metrics
recall = 84.07%, precision = 76.91%, accuracy = 92.81%
Epoch = 12
Total (training) batch = 439
training loss = 0.12039667
Total (training) batch = 108
testing loss = 0.39696844


 26%|██▌       | 13/50 [17:24<48:03, 77.93s/it]

Train metrics
recall = 92.92%, precision = 84.39%, accuracy = 95.94%
Test metrics
recall = 84.50%, precision = 81.66%, accuracy = 93.98%
Epoch = 13
Total (training) batch = 439
training loss = 0.10675489
Total (training) batch = 108
testing loss = 0.39240250


 28%|██▊       | 14/50 [18:44<47:05, 78.49s/it]

Train metrics
recall = 93.59%, precision = 85.46%, accuracy = 96.27%
Test metrics
recall = 85.18%, precision = 86.03%, accuracy = 95.00%
Epoch = 14
Total (training) batch = 439
training loss = 0.09331839
Total (training) batch = 108
testing loss = 0.42473321


 30%|███       | 15/50 [20:02<45:43, 78.39s/it]

Train metrics
recall = 94.47%, precision = 86.03%, accuracy = 96.51%
Test metrics
recall = 85.37%, precision = 80.39%, accuracy = 93.81%
Epoch = 15
Total (training) batch = 439
training loss = 0.08489478
Total (training) batch = 108
testing loss = 0.40222281


 32%|███▏      | 16/50 [21:21<44:28, 78.49s/it]

Train metrics
recall = 94.86%, precision = 86.90%, accuracy = 96.75%
Test metrics
recall = 86.39%, precision = 80.17%, accuracy = 93.89%
Epoch = 16
Total (training) batch = 439
training loss = 0.07667232
Total (training) batch = 108
testing loss = 0.42810756


 34%|███▍      | 17/50 [22:42<43:32, 79.16s/it]

Train metrics
recall = 95.34%, precision = 87.75%, accuracy = 97.00%
Test metrics
recall = 85.39%, precision = 80.29%, accuracy = 93.79%
Epoch = 17
Total (training) batch = 439
training loss = 0.06986865
Total (training) batch = 108
testing loss = 0.44154893


 36%|███▌      | 18/50 [24:03<42:32, 79.76s/it]

Train metrics
recall = 95.61%, precision = 87.95%, accuracy = 97.07%
Test metrics
recall = 85.66%, precision = 81.69%, accuracy = 94.14%
Epoch = 18
Total (training) batch = 439
training loss = 0.06533102
Total (training) batch = 108
testing loss = 0.44681665


 38%|███▊      | 19/50 [25:24<41:24, 80.13s/it]

Train metrics
recall = 96.11%, precision = 88.53%, accuracy = 97.27%
Test metrics
recall = 85.56%, precision = 81.18%, accuracy = 94.01%
Epoch = 19
Total (training) batch = 439
training loss = 0.05880184
Total (training) batch = 108
testing loss = 0.45055562


 40%|████      | 20/50 [26:44<40:05, 80.17s/it]

Train metrics
recall = 96.59%, precision = 89.60%, accuracy = 97.55%
Test metrics
recall = 85.33%, precision = 83.33%, accuracy = 94.45%
Epoch = 20
Total (training) batch = 439
training loss = 0.05520717
Total (training) batch = 108
testing loss = 0.45010391


 42%|████▏     | 21/50 [28:02<38:27, 79.58s/it]

Train metrics
recall = 96.62%, precision = 89.76%, accuracy = 97.59%
Test metrics
recall = 86.17%, precision = 82.70%, accuracy = 94.44%
Epoch = 21
Total (training) batch = 439
training loss = 0.05183696
Total (training) batch = 108
testing loss = 0.46521300


 44%|████▍     | 22/50 [29:21<37:00, 79.29s/it]

Train metrics
recall = 96.98%, precision = 90.35%, accuracy = 97.76%
Test metrics
recall = 85.36%, precision = 83.85%, accuracy = 94.57%
Epoch = 22
Total (training) batch = 439
training loss = 0.04795017
Total (training) batch = 108
testing loss = 0.46559654


 46%|████▌     | 23/50 [30:40<35:36, 79.13s/it]

Train metrics
recall = 97.13%, precision = 90.75%, accuracy = 97.86%
Test metrics
recall = 85.32%, precision = 82.04%, accuracy = 94.17%
Epoch = 23
Total (training) batch = 439
training loss = 0.04543529
Total (training) batch = 108
testing loss = 0.48102297


 48%|████▊     | 24/50 [31:58<34:11, 78.92s/it]

Train metrics
recall = 97.33%, precision = 91.23%, accuracy = 97.99%
Test metrics
recall = 85.42%, precision = 84.13%, accuracy = 94.64%
Epoch = 24
Total (training) batch = 439
training loss = 0.04250730
Total (training) batch = 108
testing loss = 0.49590592


 50%|█████     | 25/50 [33:17<32:49, 78.79s/it]

Train metrics
recall = 97.49%, precision = 91.58%, accuracy = 98.08%
Test metrics
recall = 85.24%, precision = 85.42%, accuracy = 94.88%
Epoch = 25
Total (training) batch = 439
training loss = 0.04000681
Total (training) batch = 108
testing loss = 0.51627618


 52%|█████▏    | 26/50 [34:36<31:31, 78.80s/it]

Train metrics
recall = 97.62%, precision = 91.81%, accuracy = 98.15%
Test metrics
recall = 85.19%, precision = 85.45%, accuracy = 94.88%
Epoch = 26
Total (training) batch = 439
training loss = 0.03922533
Total (training) batch = 108
testing loss = 0.52355871


 54%|█████▍    | 27/50 [35:54<30:09, 78.69s/it]

Train metrics
recall = 97.80%, precision = 91.84%, accuracy = 98.18%
Test metrics
recall = 85.17%, precision = 85.20%, accuracy = 94.83%
Epoch = 27
Total (training) batch = 439
training loss = 0.03758795
Total (training) batch = 108
testing loss = 0.52684869


 56%|█████▌    | 28/50 [37:12<28:48, 78.55s/it]

Train metrics
recall = 97.81%, precision = 92.27%, accuracy = 98.26%
Test metrics
recall = 84.59%, precision = 83.57%, accuracy = 94.40%
Epoch = 28
Total (training) batch = 439
training loss = 0.03588254
Total (training) batch = 108
testing loss = 0.52800783


 58%|█████▊    | 29/50 [38:30<27:27, 78.43s/it]

Train metrics
recall = 97.83%, precision = 92.33%, accuracy = 98.28%
Test metrics
recall = 85.21%, precision = 85.36%, accuracy = 94.86%
Epoch = 29
Total (training) batch = 439
training loss = 0.03514667
Total (training) batch = 108
testing loss = 0.51880845


 60%|██████    | 30/50 [39:48<26:01, 78.08s/it]

Train metrics
recall = 97.94%, precision = 92.60%, accuracy = 98.35%
Test metrics
recall = 85.28%, precision = 85.12%, accuracy = 94.83%
Epoch = 30
Total (training) batch = 439
training loss = 0.03276230
Total (training) batch = 108
testing loss = 0.53211197


 62%|██████▏   | 31/50 [41:06<24:46, 78.24s/it]

Train metrics
recall = 98.05%, precision = 92.92%, accuracy = 98.43%
Test metrics
recall = 85.23%, precision = 83.94%, accuracy = 94.57%
Epoch = 31
Total (training) batch = 439
training loss = 0.03155658
Total (training) batch = 108
testing loss = 0.54606416


 64%|██████▍   | 32/50 [42:24<23:26, 78.12s/it]

Train metrics
recall = 98.10%, precision = 92.96%, accuracy = 98.44%
Test metrics
recall = 84.94%, precision = 84.34%, accuracy = 94.61%
Epoch = 32
Total (training) batch = 439
training loss = 0.03094440
Total (training) batch = 108
testing loss = 0.54473807


 66%|██████▌   | 33/50 [43:43<22:09, 78.23s/it]

Train metrics
recall = 98.13%, precision = 93.28%, accuracy = 98.51%
Test metrics
recall = 84.89%, precision = 84.79%, accuracy = 94.70%
Epoch = 33
Total (training) batch = 439
training loss = 0.02986412
Total (training) batch = 108
testing loss = 0.55372753


 68%|██████▊   | 34/50 [45:01<20:52, 78.27s/it]

Train metrics
recall = 98.26%, precision = 93.50%, accuracy = 98.57%
Test metrics
recall = 84.86%, precision = 84.97%, accuracy = 94.73%
Epoch = 34
Total (training) batch = 439
training loss = 0.02914496
Total (training) batch = 108
testing loss = 0.55465478


 70%|███████   | 35/50 [46:19<19:34, 78.33s/it]

Train metrics
recall = 98.24%, precision = 93.48%, accuracy = 98.56%
Test metrics
recall = 84.89%, precision = 85.56%, accuracy = 94.86%
Epoch = 35
Total (training) batch = 439
training loss = 0.02840826
Total (training) batch = 108
testing loss = 0.56565432


 72%|███████▏  | 36/50 [47:37<18:15, 78.23s/it]

Train metrics
recall = 98.31%, precision = 93.64%, accuracy = 98.60%
Test metrics
recall = 85.15%, precision = 85.38%, accuracy = 94.86%
Epoch = 36
Total (training) batch = 439
training loss = 0.02752865
Total (training) batch = 108
testing loss = 0.56520050


 74%|███████▍  | 37/50 [48:56<16:58, 78.37s/it]

Train metrics
recall = 98.36%, precision = 93.93%, accuracy = 98.66%
Test metrics
recall = 84.80%, precision = 85.14%, accuracy = 94.76%
Epoch = 37
Total (training) batch = 439
training loss = 0.02631487
Total (training) batch = 108
testing loss = 0.57705844


 76%|███████▌  | 38/50 [50:15<15:43, 78.64s/it]

Train metrics
recall = 98.43%, precision = 94.07%, accuracy = 98.70%
Test metrics
recall = 84.92%, precision = 85.04%, accuracy = 94.76%
Epoch = 38
Total (training) batch = 439
training loss = 0.02651458
Total (training) batch = 108
testing loss = 0.58233475


 78%|███████▊  | 39/50 [51:35<14:27, 78.83s/it]

Train metrics
recall = 98.41%, precision = 93.87%, accuracy = 98.66%
Test metrics
recall = 84.69%, precision = 85.37%, accuracy = 94.79%
Epoch = 39
Total (training) batch = 439
training loss = 0.02542383
Total (training) batch = 108
testing loss = 0.57470455


 80%|████████  | 40/50 [52:53<13:06, 78.69s/it]

Train metrics
recall = 98.47%, precision = 94.20%, accuracy = 98.73%
Test metrics
recall = 85.00%, precision = 85.32%, accuracy = 94.83%
Epoch = 40
Total (training) batch = 439
training loss = 0.02506385
Total (training) batch = 108
testing loss = 0.58952046


 82%|████████▏ | 41/50 [54:12<11:47, 78.64s/it]

Train metrics
recall = 98.47%, precision = 94.18%, accuracy = 98.73%
Test metrics
recall = 84.60%, precision = 86.23%, accuracy = 94.95%
Epoch = 41
Total (training) batch = 439
training loss = 0.02533092
Total (training) batch = 108
testing loss = 0.58582595


 84%|████████▍ | 42/50 [55:30<10:28, 78.53s/it]

Train metrics
recall = 98.52%, precision = 94.41%, accuracy = 98.78%
Test metrics
recall = 84.57%, precision = 85.68%, accuracy = 94.83%
Epoch = 42
Total (training) batch = 439
training loss = 0.02494000
Total (training) batch = 108
testing loss = 0.59071458


 86%|████████▌ | 43/50 [56:48<09:09, 78.49s/it]

Train metrics
recall = 98.55%, precision = 94.34%, accuracy = 98.77%
Test metrics
recall = 84.70%, precision = 86.41%, accuracy = 95.00%
Epoch = 43
Total (training) batch = 439
training loss = 0.02422787
Total (training) batch = 108
testing loss = 0.59459202


 88%|████████▊ | 44/50 [58:07<07:51, 78.52s/it]

Train metrics
recall = 98.58%, precision = 94.61%, accuracy = 98.82%
Test metrics
recall = 84.78%, precision = 86.29%, accuracy = 94.99%
Epoch = 44
Total (training) batch = 439
training loss = 0.02351872
Total (training) batch = 108
testing loss = 0.59768528


 90%|█████████ | 45/50 [59:24<06:30, 78.11s/it]

Train metrics
recall = 98.62%, precision = 94.67%, accuracy = 98.84%
Test metrics
recall = 84.75%, precision = 86.06%, accuracy = 94.94%
Epoch = 45
Total (training) batch = 439
training loss = 0.02389830
Total (training) batch = 108
testing loss = 0.59969714


 92%|█████████▏| 46/50 [1:00:05<04:27, 66.87s/it]

Train metrics
recall = 98.60%, precision = 94.62%, accuracy = 98.83%
Test metrics
recall = 84.86%, precision = 85.87%, accuracy = 94.92%
Epoch = 46
Total (training) batch = 439
training loss = 0.02344542
Total (training) batch = 108
testing loss = 0.60506356


 94%|█████████▍| 47/50 [1:00:17<02:31, 50.59s/it]

Train metrics
recall = 98.60%, precision = 94.58%, accuracy = 98.82%
Test metrics
recall = 84.64%, precision = 86.03%, accuracy = 94.92%
Epoch = 47
Total (training) batch = 439
training loss = 0.02295291
Total (training) batch = 108
testing loss = 0.59968222


 96%|█████████▌| 48/50 [1:00:30<01:18, 39.39s/it]

Train metrics
recall = 98.68%, precision = 94.65%, accuracy = 98.85%
Test metrics
recall = 84.63%, precision = 86.18%, accuracy = 94.94%
Epoch = 48
Total (training) batch = 439
training loss = 0.02253207
Total (training) batch = 108
testing loss = 0.60300491


 98%|█████████▊| 49/50 [1:00:44<00:31, 31.68s/it]

Train metrics
recall = 98.65%, precision = 94.67%, accuracy = 98.85%
Test metrics
recall = 84.69%, precision = 86.44%, accuracy = 95.00%
Epoch = 49
Total (training) batch = 439
training loss = 0.02259921
Total (training) batch = 108
testing loss = 0.60232710


100%|██████████| 50/50 [1:00:58<00:00, 73.17s/it]

Train metrics
recall = 98.70%, precision = 94.87%, accuracy = 98.89%
Test metrics
recall = 84.66%, precision = 86.14%, accuracy = 94.94%





## 训练总结

加载预训练emb果然是效果突出

Epoch = 1
Train metrics
recall = 67.98%, precision = 59.16%, accuracy = 86.80%
Test metrics
recall = 66.32%, precision = 62.99%, accuracy = 87.31%

Epoch = 49
Train metrics
recall = 98.70%, precision = 94.87%, accuracy = 98.89%
Test metrics
recall = 84.66%, precision = 86.14%, accuracy = 94.94%

## 模型加载及结果探查

In [None]:
model_lstm_load = BiLSTM_CRF(config=Config)
model_lstm_load.load_state_dict(torch.load('./models/model_v0_V0-Embrand200-bilstm1Layer200Hidden16Batch1e-3Learnepoch=27accu=0.835210509314095recall=0.9078498293515358F1=0.43500830208429403'))

In [None]:
model_lstm_load

In [None]:
t=next(iter(dataset_conll2003_test_loader))
print(t)
model_lstm_load.cuda()
model_lstm_load.eval()
y = t[2]
res = model_lstm_load(t[0].cuda(), t[1].cuda())
res_arg = torch.argmax(res, dim=2)

### 案例探查总结

一些误判如下：
- 一些容易修正的误判（加规则或者加CRF）
's, y: O, predict: I-ORG

- 地点误判为机构
United, y: B-LOC, predict: I-ORG Arab, y: I-LOC, predict: I-LOC Emirates, y: I-LOC, predict: I-ORG

In [None]:
# true value
y_ner_list = []
for sent in y:
    tmp = []
    for token in sent:
        tmp.append(ner_id2tag[token.item()])
    y_ner_list.append(tmp)

# predict
res_ner_list = []
for sent in res_arg:
    tmp = []
    for token in sent:
        tmp.append(ner_id2tag[token.item()])
    res_ner_list.append(tmp)

for i, sent in enumerate(t[4]):
    print('='*50)
    for j, token in enumerate(sent):
        print('{}, y: {}, predict: {}'.format(token, y_ner_list[i][j], res_ner_list[i][j]))