In [1]:
# basic
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
from tqdm import tqdm
import sklearn

# np/pd
import numpy as np
import pandas as pd

# torch
import torch
import torchtext
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence

# transformer
from datasets import load_dataset

# CRF
from torchcrf import CRF

In [2]:
from torchtext.vocab import GloVe

global_vectors = GloVe(name='840B', dim=300, cache='../../data/Glove840B300d/')

In [3]:
print(global_vectors.get_vecs_by_tokens(['the','world','will','be','better']).shape)

torch.Size([5, 300])


In [4]:
torch.cuda.is_available()

True

## 探查conll2003数据

定义dataset，dataLoader

In [5]:
# data_udpos = torchtext.datasets.UDPOS(root='./torchtext_datasets_udpos/', split=('train','valid','test'))
dataset_conll2003 = load_dataset("conll2003")

Reusing dataset conll2003 (/root/.cache/huggingface/datasets/conll2003/conll2003/1.0.0/9a4d16a94f8674ba3466315300359b0acd891b68b6c8743ddf60b9c702adce98)


  0%|          | 0/3 [00:00<?, ?it/s]

In [6]:
dataset_conll2003['test'][0]

{'id': '0',
 'tokens': ['SOCCER',
  '-',
  'JAPAN',
  'GET',
  'LUCKY',
  'WIN',
  ',',
  'CHINA',
  'IN',
  'SURPRISE',
  'DEFEAT',
  '.'],
 'pos_tags': [21, 8, 22, 37, 22, 22, 6, 22, 15, 12, 21, 7],
 'chunk_tags': [11, 0, 11, 21, 11, 12, 0, 11, 13, 11, 12, 0],
 'ner_tags': [0, 0, 5, 0, 0, 0, 0, 1, 0, 0, 0, 0]}

In [7]:
dataset_conll2003.class_encode_column

<bound method DatasetDict.class_encode_column of DatasetDict({
    train: Dataset({
        features: ['id', 'tokens', 'pos_tags', 'chunk_tags', 'ner_tags'],
        num_rows: 14041
    })
    validation: Dataset({
        features: ['id', 'tokens', 'pos_tags', 'chunk_tags', 'ner_tags'],
        num_rows: 3250
    })
    test: Dataset({
        features: ['id', 'tokens', 'pos_tags', 'chunk_tags', 'ner_tags'],
        num_rows: 3453
    })
})>

In [8]:
START_TAG = "<START>"
STOP_TAG = "<STOP>"

In [9]:
ner_tag2id = {'O': 0, 'B-PER': 1, 'I-PER': 2, 'B-ORG': 3, 'I-ORG': 4, 
              'B-LOC': 5, 'I-LOC': 6, 'B-MISC': 7, 'I-MISC': 8, START_TAG:9, STOP_TAG:10}

ner_id2tag = {}
for key in ner_tag2id.keys():
    ner_id2tag[ner_tag2id[key]] = key

In [10]:
ner_id2tag

{0: 'O',
 1: 'B-PER',
 2: 'I-PER',
 3: 'B-ORG',
 4: 'I-ORG',
 5: 'B-LOC',
 6: 'I-LOC',
 7: 'B-MISC',
 8: 'I-MISC',
 9: '<START>',
 10: '<STOP>'}

In [11]:
def ner_id_to_tags(ner_id_seq):
    res1, res2 = [], []
    for ner_id in ner_id_seq:
        res1.append(ner_id2tag.get(ner_id, ''))
        res2.append(ner_id2tag.get(ner_id, '-').split('-')[-1])
    return res1, res2

ner_id_to_tags(dataset_conll2003['train'][0]['ner_tags'])

(['B-ORG', 'O', 'B-MISC', 'O', 'O', 'O', 'B-MISC', 'O', 'O'],
 ['ORG', 'O', 'MISC', 'O', 'O', 'O', 'MISC', 'O', 'O'])

In [12]:
m = 0
for k in dataset_conll2003.keys():
    for i in dataset_conll2003[k]['tokens']:
        m = max(m, len(i))
print('max length = {}'.format(m))

max length = 124


In [13]:
word_to_ix = {}
for k in dataset_conll2003.keys():
    for tokens in dataset_conll2003[k]['tokens']:
        for token in tokens:
            if token not in word_to_ix:
                word_to_ix[token] = len(word_to_ix)

for token in [START_TAG, STOP_TAG]:
    if token not in word_to_ix:
        word_to_ix[token] = len(word_to_ix)
        
print(len(word_to_ix))

30291


In [14]:
list(word_to_ix.keys())[-3:]

['well-fancied', '<START>', '<STOP>']

In [15]:
t = dataset_conll2003['train'][0]['ner_tags']
t.insert(0, ner_tag2id[START_TAG])
t.append(ner_tag2id[STOP_TAG])
print(t)

[9, 3, 0, 7, 0, 0, 0, 7, 0, 0, 10]


In [16]:
def func_word2ix(word_to_ix, token_list):
    res = list()
    for token in token_list:
        res.append(word_to_ix.get(token, len(word_to_ix)+1))
    return {'token_ids':res}

def fill_start_end_tag(ner_tags_tmp):
    ner_tags_curr = ner_tags_tmp.copy()
    ner_tags_curr.insert(0,  ner_tag2id[START_TAG])
    ner_tags_curr.append(ner_tag2id[STOP_TAG])
    return {'ner_tags_fill': ner_tags_curr}


def func_get_new_cols(word_to_ix, token_list, ner_tags_tmp):
    # get token ids
    res = list()
    res.append(word_to_ix.get(START_TAG))
    for token in token_list:
        res.append(word_to_ix.get(token, len(word_to_ix)+1))
    res.append(word_to_ix.get(STOP_TAG))
    
    # fill the tokens 
    token_list.insert(0, START_TAG)
    token_list.append(STOP_TAG)
    
    # fill the ner tags
    ner_tags_curr = ner_tags_tmp.copy()
    ner_tags_curr.insert(0,  ner_tag2id[START_TAG])
    ner_tags_curr.append(ner_tag2id[STOP_TAG])
    return {'token_ids':res, 'ner_tags_fill': ner_tags_curr, }

In [17]:
word_to_ix.get(START_TAG)

30289

In [18]:
dataset_conll2003_train=dataset_conll2003['train'].map(lambda x: func_get_new_cols(word_to_ix, x['tokens'], x['ner_tags']))
dataset_conll2003_train.set_format(type="torch", columns=['token_ids','ner_tags_fill'])

dataset_conll2003_test=dataset_conll2003['test'].map(lambda x: func_get_new_cols(word_to_ix, x['tokens'], x['ner_tags']))
dataset_conll2003_test.set_format(type="torch", columns=['token_ids','ner_tags_fill'])

dataset_conll2003_val=dataset_conll2003['validation'].map(lambda x: func_get_new_cols(word_to_ix, x['tokens'], x['ner_tags']))
dataset_conll2003_val.set_format(type="torch", columns=['token_ids','ner_tags_fill'])

Loading cached processed dataset at /root/.cache/huggingface/datasets/conll2003/conll2003/1.0.0/9a4d16a94f8674ba3466315300359b0acd891b68b6c8743ddf60b9c702adce98/cache-98d7a3e3f12bd0c0.arrow
Loading cached processed dataset at /root/.cache/huggingface/datasets/conll2003/conll2003/1.0.0/9a4d16a94f8674ba3466315300359b0acd891b68b6c8743ddf60b9c702adce98/cache-ac8336dbe7d3dd9f.arrow
Loading cached processed dataset at /root/.cache/huggingface/datasets/conll2003/conll2003/1.0.0/9a4d16a94f8674ba3466315300359b0acd891b68b6c8743ddf60b9c702adce98/cache-7798d4403bff3e48.arrow


In [19]:
dataset_conll2003_test['ner_tags_fill'][1]

tensor([ 9,  1,  2, 10])

In [20]:
dataset_conll2003_train['tokens'][0], dataset_conll2003_train['token_ids'][0],  dataset_conll2003_train['ner_tags_fill'][0]

(['<START>',
  'EU',
  'rejects',
  'German',
  'call',
  'to',
  'boycott',
  'British',
  'lamb',
  '.',
  '<STOP>'],
 tensor([30289,     0,     1,     2,     3,     4,     5,     6,     7,     8,
         30290]),
 tensor([ 9,  3,  0,  7,  0,  0,  0,  7,  0,  0, 10]))

In [21]:
dataset_conll2003_train

Dataset({
    features: ['id', 'tokens', 'pos_tags', 'chunk_tags', 'ner_tags', 'token_ids', 'ner_tags_fill'],
    num_rows: 14041
})

In [22]:
class MyDataset(Dataset):

    def __init__(self, data):
        self.data = data
        self.token_ids = self.data['token_ids'] # 在这变成torch.tensor，但长度不同
        self.ner_tags_fill = self.data['ner_tags_fill']
        self.tokens = self.data['tokens']
        
    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
#         curr = dict()
#         curr['token_ids'] = self.token_ids
#         curr['ner_tags'] = self.ner_tags
        return self.token_ids[index], self.ner_tags_fill[index], self.tokens[index]


def collate_fn_padd(batch):
    '''
    Padds batch of variable length

    note: it converts things ToTensor manually here since the ToTensor transform
    assume it takes in images rather than arbitrary tensors.
    '''
    x, y, z = zip(*batch)
    x_lens = [len(x_i) for x_i in x]
    y_lens = [len(y_i) for y_i in y]
    x_pad = torch.nn.utils.rnn.pad_sequence(x, batch_first=True)
    y_pad = torch.nn.utils.rnn.pad_sequence(y, batch_first=True)
    return x_pad, torch.tensor(x_lens), y_pad, torch.tensor(y_lens), z
    
dataset_conll2003_train_loader = DataLoader(
    MyDataset(dataset_conll2003_train),
    batch_size=32,
    shuffle=True, 
    collate_fn=lambda x: collate_fn_padd(x))


dataset_conll2003_test_loader = DataLoader(
    MyDataset(dataset_conll2003_test),
    batch_size=32,
    shuffle=True, 
    collate_fn=lambda x: collate_fn_padd(x))

In [96]:
dataset_conll2003_val_loader = DataLoader(
    MyDataset(dataset_conll2003_val),
    batch_size=32,
    shuffle=True, 
    collate_fn=lambda x: collate_fn_padd(x))

In [23]:
t = next(iter(dataset_conll2003_train_loader))
pos = 0
print(t[0][pos], sum(t[0][pos]>0), t[1][pos], t[2][pos], t[3][pos], t[4][pos], len(t[4][pos]))

tensor([30289,  4936,    70,  4935,  2058, 30290,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0]) tensor(6) tensor(6) tensor([ 9,  5,  0,  5,  0, 10,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0]) tensor(6) ['<START>', 'PARAMARIBO', ',', 'Surinam', '1996-08-21', '<STOP>'] 6


In [24]:
print(dataset_conll2003_train_loader.dataset.__len__(), dataset_conll2003_test_loader.dataset.__len__())

14041 3453


## token-emb

In [25]:
print(global_vectors.get_vecs_by_tokens(START_TAG)[0:10],global_vectors.get_vecs_by_tokens(STOP_TAG)[0:10] )

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]) tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])


In [55]:
glove_weight = global_vectors.get_vecs_by_tokens(list(word_to_ix.keys()))
glove_weight[-2], glove_weight[-1] = torch.randn(300), torch.randn(300)
embedding_glove = nn.Embedding.from_pretrained(glove_weight)

In [59]:
embedding_glove(torch.tensor(glove_weight.shape[0]-1))

tensor([ 9.4590e-01,  6.4280e-01,  1.2850e+00, -1.9312e+00, -7.7402e-01,
         1.1995e+00, -1.1461e+00, -7.8981e-01,  2.0956e-02,  2.0498e+00,
        -1.9247e-01,  1.1378e+00,  3.1604e-01,  7.2349e-01,  1.0631e+00,
         4.6080e-01, -2.2031e+00, -1.6780e-01, -5.1149e-01,  7.7356e-01,
         2.8797e-01, -2.8857e-01, -1.8801e+00, -1.0674e+00,  2.7922e-01,
        -4.4782e-01,  2.5344e-01,  3.0045e-02,  3.5837e-01,  2.1979e-01,
        -1.1757e+00, -9.0621e-02, -8.6704e-01,  3.3297e-02,  6.8099e-01,
         1.1265e-01, -1.2839e+00, -4.6444e-01,  1.3207e+00,  6.8688e-01,
        -8.0311e-01, -5.2655e-01, -9.6999e-01,  7.4705e-01,  8.9972e-01,
         7.6337e-02, -1.3263e+00, -5.2868e-01, -1.2421e+00,  2.5707e+00,
        -4.9449e-01, -7.2044e-02,  1.3960e+00, -5.3946e-01, -3.2282e-01,
         7.2201e-01,  1.5926e+00, -3.4144e-01, -1.3687e+00,  4.3110e-01,
         1.6970e+00, -3.4245e-01,  8.4286e-01, -1.1194e+00, -9.9454e-01,
        -1.2910e+00, -2.3693e+00, -9.6196e-01,  1.8

In [60]:
(1-glove_weight[0]==embedding_glove(torch.LongTensor([0]))).sum()

tensor(0)

## 模型定义

In [72]:
class GLove_BiLSTM_CRF(nn.Module):

    def __init__(self, config=None, embedding_weight = None):
        super(GLove_BiLSTM_CRF, self).__init__()
        self.config = config

        # BiLSTM-model 给 emission 层定义参数
        self.embedding_dim = self.config.get('embedding_dim', 300)
        self.hidden_dim = self.config.get('hidden_dim', 200)
        self.vocab_size = self.config.get('vocab_size', 30289)

        if embedding_weight is not None:
            self.word_embeds = nn.Embedding.from_pretrained(embedding_weight)
        else:
            self.word_embeds = nn.Embedding(self.vocab_size, self.embedding_dim)            
            
        self.target_size = self.config.get('num_tags', 11)
        self.num_layers = self.config.get('num_layers',1)
        self.batch_size = self.config.get('batch_size',16)
        self.bidirectional = True

        # lstm
        self.lstm = nn.LSTM(input_size=self.embedding_dim, hidden_size=self.hidden_dim//2,
                            num_layers=self.num_layers, bidirectional=self.bidirectional)
        self.hidden2tag1 = nn.Linear(self.hidden_dim, self.target_size*3)
        self.hidden2tag2 = nn.Linear(self.target_size*3, self.target_size)
        self.dropout010 = nn.Dropout(0.1)
        self.dropout020 = nn.Dropout(0.2)
#         self.hidden_init = self.init_hidden()

        # CRF-model
        self.crf = CRF(self.config.get('num_tags', 9), batch_first=True)
        self.crf.transitions.data[ner_tag2id[START_TAG], :] = -10000
        self.crf.transitions.data[:, ner_tag2id[STOP_TAG]] = -10000

    def init_hidden(self):
        hidden = torch.zeros(self.num_layers*2 if self.bidirectional else self.num_layers,
                             self.batch_size, self.hidden_dim//2)
        cell = torch.zeros(self.num_layers*2 if self.bidirectional else self.num_layers,
                             self.batch_size, self.hidden_dim//2)
        return hidden, cell
    
    def cal_mask(self, sent_len):
        sent_len = sent_len.cuda()
        batch_size = torch.tensor(len(sent_len))
        batch_size = batch_size.cuda()
        batch_seq_len_max = max(sent_len)
        mask = torch.zeros((batch_size.item(), batch_seq_len_max))
        mask = mask.cuda()
        for mask_i in range(mask.shape[0]):
            mask[mask_i][0:sent_len[mask_i]] = 1
#         assert sum(mask.sum(axis=1) == sent_len) // batch_size == 1
        mask = mask > 0
        return mask
    
    def _get_features(self, sent, sent_len):
#         mask = self.cal_mask(sent_len)
            
        embeds = self.word_embeds(sent)
        embed_packed = pack_padded_sequence(embeds, lengths=sent_len.to('cpu'),
                                            batch_first=True,
                                            enforce_sorted=False)
        lstm_out, (hidden, cell) = self.lstm(embed_packed) #, self.hidden_init)
        lstm_out, lens = pad_packed_sequence(lstm_out, batch_first=True)
        tag_score = self.hidden2tag1(lstm_out)
        tag_score = self.dropout010(tag_score)
        tag_score = self.hidden2tag2(tag_score)
        return tag_score
    
    def neg_log_likelihood(self, sent, label, sent_len):
        assert sent.shape[0:2]==label.shape
        mask = self.cal_mask(sent_len)
        mask = mask.cuda()
        feats = self._get_features(sent, sent_len)
        return self.crf(feats, label, mask)

    def forward(self, sent, sent_len):
        """

        :param sent: 输入的已转换为token_id的句子，(batch_len * sent_len * token_emb_len)
        :param sent_len: tensor(list(int))
        :return:
        """
        feats = self._get_features(sent, sent_len)
        return torch.tensor(self.crf.decode(feats))
#         batch_size = len(sent_len)
#         batch_seq_len_max = max(sent_len)
#         mask = torch.zeros((batch_size, batch_seq_len_max))
#         for mask_i in range(mask.shape[0]):
#             mask[mask_i][0:sent_len[mask_i]] = 1
#         assert sum(mask.sum(axis=1) == sent_len) // batch_size == 1
#         mask = self.cal_mask(sent_len)
            
#         embeds = self.word_embeds(sent)
#         embed_packed = pack_padded_sequence(embeds, lengths=sent_len.to('cpu'),
#                                             batch_first=True,
#                                             enforce_sorted=False)
#         lstm_out, (hidden, cell) = self.lstm(embed_packed) #, self.hidden_init)
#         lstm_out, lens = pad_packed_sequence(lstm_out, batch_first=True)
#         tag_score = self.hidden2tag1(lstm_out)
#         tag_score = self.dropout010(tag_score)
#         tag_score = self.hidden2tag2(tag_score)
#         tag_score = self.crf(tag_score)
# #         tag_score = nn.functional.softmax(tag_score, dim=-1)
#         return tag_score

In [62]:
Config = {
    'num_tags':len(ner_tag2id),
    'num_layers':1,
    'embedding_dim':300,
    'vocab_size':glove_weight.shape[0],
    'hidden_dim':100,
    'batch_size':32,
    'START_TAG':"<START>",
    'STOP_TAG': "<STOP>"
}

Config

{'num_tags': 11,
 'num_layers': 1,
 'embedding_dim': 300,
 'vocab_size': 30291,
 'hidden_dim': 100,
 'batch_size': 32,
 'START_TAG': '<START>',
 'STOP_TAG': '<STOP>'}

In [63]:
model_name = 'V3-Emb=Glove300-bilstmCRFLayer=1Hidden=200Dropout0.1Batch=32Learn=1e-1'
model_lstm_crf = GLove_BiLSTM_CRF(config=Config, embedding_weight=glove_weight)

model_lstm_crf = model_lstm_crf.cuda()

model_lstm_crf.parameters

<bound method Module.parameters of GLove_BiLSTM_CRF(
  (word_embeds): Embedding(30291, 300)
  (lstm): LSTM(300, 50, bidirectional=True)
  (hidden2tag1): Linear(in_features=100, out_features=33, bias=True)
  (hidden2tag2): Linear(in_features=33, out_features=11, bias=True)
  (dropout010): Dropout(p=0.1, inplace=False)
  (dropout020): Dropout(p=0.2, inplace=False)
  (crf): CRF(num_tags=11)
)>

In [64]:
# 手动计算验证权重
import collections
ner_tags_all = torch.cat(dataset_conll2003_train['ner_tags_fill'])
t=collections.Counter(ner_tags_all.numpy())
res = []
for k in t:
    res.append((k, len(ner_tags_all)/9/t[k]))
print(res)

class_weights=sklearn.utils.class_weight.compute_class_weight(
    class_weight='balanced',classes=np.unique(ner_tags_all),y=ner_tags_all.numpy())
class_weights=torch.tensor(class_weights,dtype=torch.float)
class_weights

[(9, 1.8335430366624725), (3, 4.072896341999332), (0, 0.15181673199222645), (7, 7.488300691616573), (10, 1.8335430366624725), (1, 3.900723905723906), (2, 5.685684138201806), (5, 3.605711173358232), (4, 6.950533957283417), (8, 22.28985088985089), (6, 22.251320464803612)]


tensor([ 0.1242,  3.1915,  4.6519,  3.3324,  5.6868,  2.9501, 18.2056,  6.1268,
        18.2372,  1.5002,  1.5002])

In [65]:
optimizer = torch.optim.Adam(model_lstm_crf.parameters(), lr=1e-1)
loss_fn = nn.CrossEntropyLoss(reduction='mean', weight=class_weights.cuda()) 
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)

## 测试model

In [66]:
# # 测试model
# tmp = next(iter(dataset_conll2003_train_loader))
# # print(tmp)
# sent = tmp[0]
# print(sent.shape)
# sent_len = tmp[1]
# y = tmp[2]

# print(sent.shape, y.shape)

# sent = sent.cuda()
# sent_len = sent_len.cuda()

# model_lstm_crf = model_lstm_crf.cuda()

# with torch.no_grad():
#     res1 = model_lstm_crf(sent.cuda(), sent_len.cuda())
#     loss = model_lstm_crf.neg_log_likelihood(sent=sent, label=y, sent_len=sent_len)

## 定义训练函数

In [67]:
def train_eval_single_epoch(model, data_iter=None, optimizer=None, loss_fn=None, is_train=False):
    if is_train:
        model.train()
    else:
        model.eval()   
    correct_curr, correct_sum, loss_sum, loss_curr = 0, 0, 0, 0
    loss_list, accuracy_list = [], []
    print('Total (training) batch = {}'.format(len(data_iter)))
    batch_i = 0
    data_iter_len = data_iter.dataset.__len__() # total sample num.
    batch_num = len(data_iter)
    
    batch_loss_list = list()
    logits_list = list()
    y_list, y_len_list = [], []
    for batch_data in data_iter:
        torch.cuda.empty_cache()
        batch_i += 1
        if is_train:
            optimizer.zero_grad()
            model.zero_grad()
        x, x_len, y, y_len, _ = batch_data
        x = x.cuda()
        x_len = x_len.cuda()
        y = y.cuda()
        # model predict 
        logits = model(x, x_len)
        assert logits.shape[0:2]==x.shape[0:2]
        # compute loss 
#         batch_loss = 0
        batch_loss = - model.neg_log_likelihood(sent=x, label=y, sent_len=x_len)
#         for i in range(logits.size(0)): # num of samples in one batch
#             loss = loss_fn(logits[i], y[i])
#             batch_loss += loss 
        
#         batch_loss /= logits.size(0)
        if is_train:
            batch_loss.backward()
            optimizer.step()
        
        # 记录
        batch_loss_list.append(batch_loss.item())
        logits_list.append(logits)
        y_list.append(y)
        y_len_list.append(y_len)
#         if batch_i%100==0:
#             print(batch_loss.item())
        
        x = x.cpu()
        x_len = x_len.cpu()
        y = y.cpu()
        
#     print(sum(batch_loss_list)/batch_num)
    return batch_loss_list, logits_list, y_list, y_len_list

In [68]:
def func_eval(model, data_iter=None, loss_fn=None):
    model = model.cuda()
    model.eval()
    correct_curr, correct_sum, loss_sum, loss_curr = 0, 0, 0, 0
    loss_list, accuracy_list = [], []
    print('Total (training) batch = {}'.format(len(data_iter)))
    batch_i = 0
    data_iter_len = data_iter.dataset.__len__() # total sample num.
    batch_num = len(data_iter)
    
    batch_loss_list = list()
    logits_list = list()
    y_list, y_len_list = [], []
    for batch_data in data_iter:
        batch_i += 1
        x, x_len, y, y_len,_ = batch_data
        x = x.cuda()
        x_len = x_len.cuda()
        y = y.cuda()
        # model predict 
        logits = model(x, x_len)
        assert logits.shape[0:2]==x.shape[0:2]
        # compute loss 
        batch_loss = - model.neg_log_likelihood(sent=x, label=y, sent_len=x_len)
#         batch_loss = 0
#         for i in range(logits.size(0)): # num of samples in one batch
#             loss = loss_fn(logits[i], y[i])
#             batch_loss -= loss 
        
#         batch_loss /= logits.size(0)

        # 记录
        batch_loss_list.append(batch_loss.item())
        logits_list.append(logits)
        y_list.append(y)
        y_len_list.append(y_len)
#         if batch_i%100==0:
#             print(batch_loss.item())
        
        x = x.cpu()
        x_len = x_len.cpu()
        y = y.cpu()
        
#     print(sum(batch_loss_list)/batch_num)
    return batch_loss_list, logits_list, y_list, y_len_list

## 训练开始

In [69]:
from MyFunctions3 import MyTools

In [70]:
f1_test_curr, f1_test_max = 0, 0

for epoch in tqdm(range(50)):
    print('='*50)
    print('Epoch = {}'.format(epoch))
    print('='*50)
    batch_loss_list, logits_list, y_list, y_len_list = train_eval_single_epoch(model_lstm_crf, 
                                                                               dataset_conll2003_train_loader, 
                                                                               optimizer=optimizer,
                                                                               loss_fn=loss_fn,
                                                                               is_train=True)
    scheduler.step() # 加上后好一些
    print('training loss = {:.8f}'.format(sum(batch_loss_list)/len(batch_loss_list)))
    test_batch_loss_list, test_logits_list, test_y_list, test_y_len_list = func_eval(model_lstm_crf, 
                                                                               dataset_conll2003_test_loader, 
                                                                               loss_fn=loss_fn)
    print('testing loss = {:.8f}'.format(sum(test_batch_loss_list)/len(test_batch_loss_list)))
    
#     评估
    train_dict = MyTools.func_cal_accu_recall(logits_list=logits_list, y_list=y_list, y_len_list=y_len_list, 
                                              is_logits_tag=True)
    test_dict = MyTools.func_cal_accu_recall(logits_list=test_logits_list, y_list=test_y_list, 
                                             y_len_list=test_y_len_list, is_logits_tag=True)
    
    print('Train metrics')
    train_metric = MyTools.func_cal_metrics(train_dict)
    print('Test metrics')
    test_metric = MyTools.func_cal_metrics(test_dict)
    
    # save model if current f1 rate is better than previous ones
    recall_test_curr, precision_test_curr, accuracy_test_curr, f1_test_curr = test_metric
    if f1_test_curr > f1_test_max:
        torch.save(model_lstm_crf.state_dict(), './models/model_v3_'+model_name+'epoch='+str(epoch)+
                   'F1='+str(round(f1_test_curr,4))+
                   'accu='+str(round(accuracy_test_curr,4))+
                   'recall='+str(round(recall_test_curr,4))+
                   'precision='+str(round(precision_test_curr,4)))
    f1_test_max = max(f1_test_curr, f1_test_max)

  0%|          | 0/50 [00:00<?, ?it/s]

Epoch = 0
Total (training) batch = 439
training loss = 37084.86910149
Total (training) batch = 108
testing loss = 188.79130554


  assert sum(mask.sum(axis=1) == y_len_list[logit_i]) // batch_len == 1
  2%|▏         | 1/50 [01:03<51:31, 63.10s/it]

Train metrics
recall = 51.67%, precision = 75.46%, accuracy = 89.11%, f1 = 61.34%
Test metrics
recall = 63.24%, precision = 87.93%, accuracy = 92.06%, f1 = 73.57%
Epoch = 1
Total (training) batch = 439
training loss = 242.62431958
Total (training) batch = 108
testing loss = 172.03254982


  4%|▍         | 2/50 [02:05<50:00, 62.51s/it]

Train metrics
recall = 64.89%, precision = 86.60%, accuracy = 92.45%, f1 = 74.19%
Test metrics
recall = 69.51%, precision = 85.76%, accuracy = 92.66%, f1 = 76.79%
Epoch = 2
Total (training) batch = 439
training loss = 139.09085326
Total (training) batch = 108
testing loss = 148.62897265


  6%|▌         | 3/50 [03:07<48:47, 62.28s/it]

Train metrics
recall = 69.44%, precision = 89.01%, accuracy = 93.46%, f1 = 78.01%
Test metrics
recall = 71.34%, precision = 87.62%, accuracy = 93.23%, f1 = 78.64%
Epoch = 3
Total (training) batch = 439
training loss = 108.40065265
Total (training) batch = 108
testing loss = 99.52369344


  8%|▊         | 4/50 [04:08<47:31, 61.99s/it]

Train metrics
recall = 74.61%, precision = 91.47%, accuracy = 94.59%, f1 = 82.18%
Test metrics
recall = 76.61%, precision = 84.21%, accuracy = 93.41%, f1 = 80.23%
Epoch = 4
Total (training) batch = 439
training loss = 97.85171910
Total (training) batch = 108
testing loss = 149.86701852


 10%|█         | 5/50 [05:11<46:34, 62.10s/it]

Train metrics
recall = 76.45%, precision = 92.56%, accuracy = 95.04%, f1 = 83.74%
Test metrics
recall = 69.19%, precision = 93.88%, accuracy = 93.83%, f1 = 79.67%
Epoch = 5
Total (training) batch = 439
training loss = 82.01887567
Total (training) batch = 108
testing loss = 109.33852860


 12%|█▏        | 6/50 [06:13<45:31, 62.09s/it]

Train metrics
recall = 78.79%, precision = 93.42%, accuracy = 95.53%, f1 = 85.48%
Test metrics
recall = 76.08%, precision = 89.98%, accuracy = 94.34%, f1 = 82.45%
Epoch = 6
Total (training) batch = 439
training loss = 74.18475400
Total (training) batch = 108
testing loss = 109.29346212


 14%|█▍        | 7/50 [07:15<44:39, 62.32s/it]

Train metrics
recall = 80.01%, precision = 93.99%, accuracy = 95.80%, f1 = 86.44%
Test metrics
recall = 77.66%, precision = 90.52%, accuracy = 94.68%, f1 = 83.60%
Epoch = 7
Total (training) batch = 439
training loss = 67.21776863
Total (training) batch = 108
testing loss = 130.36788785


 16%|█▌        | 8/50 [08:20<44:07, 63.03s/it]

Train metrics
recall = 81.57%, precision = 94.45%, accuracy = 96.12%, f1 = 87.54%
Test metrics
recall = 73.84%, precision = 90.95%, accuracy = 94.15%, f1 = 81.51%
Epoch = 8
Total (training) batch = 439
training loss = 75.94999733
Total (training) batch = 108
testing loss = 79.29000586


 18%|█▊        | 9/50 [09:22<42:51, 62.73s/it]

Train metrics
recall = 81.90%, precision = 94.26%, accuracy = 96.14%, f1 = 87.65%
Test metrics
recall = 82.04%, precision = 92.75%, accuracy = 95.74%, f1 = 87.07%
Epoch = 9
Total (training) batch = 439
training loss = 64.29180152
Total (training) batch = 108
testing loss = 111.27881481


 20%|██        | 10/50 [10:24<41:39, 62.49s/it]

Train metrics
recall = 83.54%, precision = 95.06%, accuracy = 96.52%, f1 = 88.93%
Test metrics
recall = 83.75%, precision = 91.40%, accuracy = 95.79%, f1 = 87.41%
Epoch = 10
Total (training) batch = 439
training loss = 52.15684741
Total (training) batch = 108
testing loss = 83.05177830


 22%|██▏       | 11/50 [11:27<40:44, 62.68s/it]

Train metrics
recall = 84.93%, precision = 95.44%, accuracy = 96.80%, f1 = 89.88%
Test metrics
recall = 81.57%, precision = 92.21%, accuracy = 95.58%, f1 = 86.56%
Epoch = 11
Total (training) batch = 439
training loss = 52.36460851
Total (training) batch = 108
testing loss = 107.19123134


 24%|██▍       | 12/50 [12:29<39:36, 62.54s/it]

Train metrics
recall = 85.32%, precision = 95.57%, accuracy = 96.88%, f1 = 90.15%
Test metrics
recall = 79.73%, precision = 92.88%, accuracy = 95.39%, f1 = 85.81%
Epoch = 12
Total (training) batch = 439
training loss = 46.55162236
Total (training) batch = 108
testing loss = 97.47117897


 26%|██▌       | 13/50 [13:34<38:52, 63.05s/it]

Train metrics
recall = 86.09%, precision = 96.00%, accuracy = 97.07%, f1 = 90.78%
Test metrics
recall = 81.76%, precision = 88.94%, accuracy = 95.04%, f1 = 85.19%
Epoch = 13
Total (training) batch = 439
training loss = 44.85513081
Total (training) batch = 108
testing loss = 82.78378963


 28%|██▊       | 14/50 [14:36<37:41, 62.81s/it]

Train metrics
recall = 86.94%, precision = 96.13%, accuracy = 97.23%, f1 = 91.31%
Test metrics
recall = 83.39%, precision = 92.73%, accuracy = 95.96%, f1 = 87.82%
Epoch = 14
Total (training) batch = 439
training loss = 49.92351771
Total (training) batch = 108
testing loss = 84.62487736


 30%|███       | 15/50 [15:37<36:25, 62.45s/it]

Train metrics
recall = 87.28%, precision = 96.21%, accuracy = 97.30%, f1 = 91.52%
Test metrics
recall = 81.98%, precision = 92.76%, accuracy = 95.73%, f1 = 87.04%
Epoch = 15
Total (training) batch = 439
training loss = 45.50068344
Total (training) batch = 108
testing loss = 116.48005521


 32%|███▏      | 16/50 [16:39<35:18, 62.32s/it]

Train metrics
recall = 87.37%, precision = 96.22%, accuracy = 97.31%, f1 = 91.58%
Test metrics
recall = 83.52%, precision = 92.96%, accuracy = 96.02%, f1 = 87.99%
Epoch = 16
Total (training) batch = 439
training loss = 39.47901686
Total (training) batch = 108
testing loss = 75.01201271


 34%|███▍      | 17/50 [17:44<34:35, 62.90s/it]

Train metrics
recall = 89.19%, precision = 96.92%, accuracy = 97.72%, f1 = 92.89%
Test metrics
recall = 82.78%, precision = 92.62%, accuracy = 95.84%, f1 = 87.42%
Epoch = 17
Total (training) batch = 439
training loss = 36.74634117
Total (training) batch = 108
testing loss = 124.80183241


 36%|███▌      | 18/50 [18:47<33:33, 62.92s/it]

Train metrics
recall = 89.75%, precision = 97.08%, accuracy = 97.83%, f1 = 93.27%
Test metrics
recall = 83.23%, precision = 88.15%, accuracy = 95.12%, f1 = 85.62%
Epoch = 18
Total (training) batch = 439
training loss = 54.13782196
Total (training) batch = 108
testing loss = 86.39618909


 38%|███▊      | 19/50 [19:49<32:27, 62.83s/it]

Train metrics
recall = 89.31%, precision = 96.57%, accuracy = 97.68%, f1 = 92.80%
Test metrics
recall = 81.67%, precision = 95.08%, accuracy = 96.06%, f1 = 87.86%
Epoch = 19
Total (training) batch = 439
training loss = 32.45031571
Total (training) batch = 108
testing loss = 82.14926430


 40%|████      | 20/50 [20:52<31:21, 62.72s/it]

Train metrics
recall = 90.55%, precision = 97.21%, accuracy = 97.99%, f1 = 93.76%
Test metrics
recall = 81.56%, precision = 92.99%, accuracy = 95.70%, f1 = 86.90%
Epoch = 20
Total (training) batch = 439
training loss = 34.24285932
Total (training) batch = 108
testing loss = 88.36972781


 42%|████▏     | 21/50 [21:53<30:08, 62.38s/it]

Train metrics
recall = 90.50%, precision = 97.40%, accuracy = 98.01%, f1 = 93.82%
Test metrics
recall = 82.24%, precision = 93.81%, accuracy = 95.95%, f1 = 87.64%
Epoch = 21
Total (training) batch = 439
training loss = 29.27049147
Total (training) batch = 108
testing loss = 84.29703126


 44%|████▍     | 22/50 [22:55<28:57, 62.04s/it]

Train metrics
recall = 90.95%, precision = 97.36%, accuracy = 98.07%, f1 = 94.04%
Test metrics
recall = 81.39%, precision = 93.74%, accuracy = 95.80%, f1 = 87.13%
Epoch = 22
Total (training) batch = 439
training loss = 33.24487381
Total (training) batch = 108
testing loss = 81.37540754


 46%|████▌     | 23/50 [23:57<27:57, 62.11s/it]

Train metrics
recall = 90.39%, precision = 97.30%, accuracy = 97.97%, f1 = 93.72%
Test metrics
recall = 83.75%, precision = 93.84%, accuracy = 96.20%, f1 = 88.51%
Epoch = 23
Total (training) batch = 439
training loss = 30.55850711
Total (training) batch = 108
testing loss = 84.23894382


 48%|████▊     | 24/50 [25:00<27:02, 62.42s/it]

Train metrics
recall = 90.93%, precision = 97.43%, accuracy = 98.08%, f1 = 94.07%
Test metrics
recall = 83.46%, precision = 94.05%, accuracy = 96.19%, f1 = 88.44%
Epoch = 24
Total (training) batch = 439
training loss = 36.25971231
Total (training) batch = 108
testing loss = 92.52253070


 50%|█████     | 25/50 [26:05<26:21, 63.24s/it]

Train metrics
recall = 90.73%, precision = 97.33%, accuracy = 98.03%, f1 = 93.91%
Test metrics
recall = 82.57%, precision = 94.38%, accuracy = 96.10%, f1 = 88.08%
Epoch = 25
Total (training) batch = 439
training loss = 35.13052602
Total (training) batch = 108
testing loss = 92.84078623


 52%|█████▏    | 26/50 [27:12<25:41, 64.23s/it]

Train metrics
recall = 90.47%, precision = 97.11%, accuracy = 97.96%, f1 = 93.67%
Test metrics
recall = 83.16%, precision = 93.18%, accuracy = 95.99%, f1 = 87.88%
Epoch = 26
Total (training) batch = 439
training loss = 38.26826751
Total (training) batch = 108
testing loss = 96.23392437


 54%|█████▍    | 27/50 [28:14<24:21, 63.55s/it]

Train metrics
recall = 90.48%, precision = 97.11%, accuracy = 97.96%, f1 = 93.67%
Test metrics
recall = 84.74%, precision = 93.01%, accuracy = 96.22%, f1 = 88.68%
Epoch = 27
Total (training) batch = 439
training loss = 35.66216505
Total (training) batch = 108
testing loss = 111.90232782


 56%|█████▌    | 28/50 [29:15<23:04, 62.92s/it]

Train metrics
recall = 90.91%, precision = 97.32%, accuracy = 98.06%, f1 = 94.01%
Test metrics
recall = 82.63%, precision = 90.99%, accuracy = 95.54%, f1 = 86.61%
Epoch = 28
Total (training) batch = 439
training loss = 40.51158003
Total (training) batch = 108
testing loss = 101.69696670


 58%|█████▊    | 29/50 [30:24<22:41, 64.82s/it]

Train metrics
recall = 90.02%, precision = 97.17%, accuracy = 97.89%, f1 = 93.46%
Test metrics
recall = 84.63%, precision = 92.76%, accuracy = 96.16%, f1 = 88.51%
Epoch = 29
Total (training) batch = 439
training loss = 51.86252762
Total (training) batch = 108
testing loss = 106.79787685


 60%|██████    | 30/50 [31:29<21:34, 64.75s/it]

Train metrics
recall = 89.17%, precision = 96.83%, accuracy = 97.70%, f1 = 92.84%
Test metrics
recall = 83.20%, precision = 93.88%, accuracy = 96.12%, f1 = 88.22%
Epoch = 30
Total (training) batch = 439
training loss = 53.15311672
Total (training) batch = 108
testing loss = 214.90400413


 62%|██████▏   | 31/50 [32:30<20:11, 63.76s/it]

Train metrics
recall = 88.70%, precision = 96.62%, accuracy = 97.59%, f1 = 92.49%
Test metrics
recall = 74.32%, precision = 88.52%, accuracy = 93.83%, f1 = 80.80%
Epoch = 31
Total (training) batch = 439
training loss = 74.59288999
Total (training) batch = 108
testing loss = 110.22462859


 64%|██████▍   | 32/50 [33:36<19:15, 64.20s/it]

Train metrics
recall = 86.98%, precision = 95.94%, accuracy = 97.21%, f1 = 91.24%
Test metrics
recall = 85.01%, precision = 92.28%, accuracy = 96.14%, f1 = 88.50%
Epoch = 32
Total (training) batch = 439
training loss = 73.33183315
Total (training) batch = 108
testing loss = 110.90926107


 66%|██████▌   | 33/50 [34:40<18:12, 64.26s/it]

Train metrics
recall = 87.08%, precision = 95.95%, accuracy = 97.22%, f1 = 91.30%
Test metrics
recall = 83.84%, precision = 93.02%, accuracy = 96.08%, f1 = 88.19%
Epoch = 33
Total (training) batch = 439
training loss = 86.17208862
Total (training) batch = 108
testing loss = 124.46328170


 68%|██████▊   | 34/50 [35:42<16:55, 63.45s/it]

Train metrics
recall = 85.59%, precision = 95.76%, accuracy = 96.96%, f1 = 90.39%
Test metrics
recall = 79.88%, precision = 89.53%, accuracy = 94.85%, f1 = 84.43%
Epoch = 34
Total (training) batch = 439
training loss = 113.35994753
Total (training) batch = 108
testing loss = 184.73359285


 70%|███████   | 35/50 [36:43<15:41, 62.77s/it]

Train metrics
recall = 82.33%, precision = 94.21%, accuracy = 96.20%, f1 = 87.87%
Test metrics
recall = 72.21%, precision = 96.36%, accuracy = 94.67%, f1 = 82.56%
Epoch = 35
Total (training) batch = 439
training loss = 130.14356748
Total (training) batch = 108
testing loss = 106.75198025


 72%|███████▏  | 36/50 [37:44<14:31, 62.28s/it]

Train metrics
recall = 81.12%, precision = 94.00%, accuracy = 95.98%, f1 = 87.08%
Test metrics
recall = 83.57%, precision = 90.60%, accuracy = 95.62%, f1 = 86.94%
Epoch = 36
Total (training) batch = 439
training loss = 159.04222131
Total (training) batch = 108
testing loss = 109.75202546


 74%|███████▍  | 37/50 [38:45<13:25, 61.93s/it]

Train metrics
recall = 78.72%, precision = 92.39%, accuracy = 95.36%, f1 = 85.01%
Test metrics
recall = 81.27%, precision = 91.54%, accuracy = 95.42%, f1 = 86.10%
Epoch = 37
Total (training) batch = 439
training loss = 204.22103791
Total (training) batch = 108
testing loss = 145.98527922


 76%|███████▌  | 38/50 [39:48<12:28, 62.37s/it]

Train metrics
recall = 72.66%, precision = 88.59%, accuracy = 93.87%, f1 = 79.84%
Test metrics
recall = 71.35%, precision = 95.57%, accuracy = 94.42%, f1 = 81.71%
Epoch = 38
Total (training) batch = 439
training loss = 748.48452404
Total (training) batch = 108
testing loss = 345.27642144


 78%|███████▊  | 39/50 [40:50<11:21, 62.00s/it]

Train metrics
recall = 59.80%, precision = 78.66%, accuracy = 90.57%, f1 = 67.94%
Test metrics
recall = 54.39%, precision = 80.97%, accuracy = 89.80%, f1 = 65.07%
Epoch = 39
Total (training) batch = 439
training loss = 354.66648945
Total (training) batch = 108
testing loss = 150.93711118


 80%|████████  | 40/50 [41:51<10:18, 61.90s/it]

Train metrics
recall = 66.50%, precision = 83.14%, accuracy = 92.14%, f1 = 73.89%
Test metrics
recall = 72.36%, precision = 87.74%, accuracy = 93.41%, f1 = 79.31%
Epoch = 40
Total (training) batch = 439
training loss = 1100.56265794
Total (training) batch = 108
testing loss = 755.62313730


 82%|████████▏ | 41/50 [42:52<09:14, 61.65s/it]

Train metrics
recall = 47.48%, precision = 63.37%, accuracy = 86.63%, f1 = 54.28%
Test metrics
recall = 44.00%, precision = 82.63%, accuracy = 88.60%, f1 = 57.42%
Epoch = 41
Total (training) batch = 439
training loss = 1258.16356421
Total (training) batch = 108
testing loss = 693.52860966


 84%|████████▍ | 42/50 [43:54<08:12, 61.61s/it]

Train metrics
recall = 35.65%, precision = 47.43%, accuracy = 82.64%, f1 = 40.71%
Test metrics
recall = 31.39%, precision = 78.56%, accuracy = 86.52%, f1 = 44.85%
Epoch = 42
Total (training) batch = 439
training loss = 1689.06511158
Total (training) batch = 108
testing loss = 610.34832538


 86%|████████▌ | 43/50 [44:55<07:09, 61.43s/it]

Train metrics
recall = 24.65%, precision = 33.48%, accuracy = 79.21%, f1 = 28.40%
Test metrics
recall = 35.31%, precision = 58.15%, accuracy = 84.26%, f1 = 43.94%
Epoch = 43
Total (training) batch = 439
training loss = 2313.17279595
Total (training) batch = 108
testing loss = 1974.38868996


 88%|████████▊ | 44/50 [45:56<06:08, 61.34s/it]

Train metrics
recall = 15.55%, precision = 20.94%, accuracy = 76.06%, f1 = 17.85%
Test metrics
recall = 12.15%, precision = 17.49%, accuracy = 74.63%, f1 = 14.34%
Epoch = 44
Total (training) batch = 439
training loss = 2407.22969462
Total (training) batch = 108
testing loss = 1187.68603516


 90%|█████████ | 45/50 [46:57<05:06, 61.35s/it]

Train metrics
recall = 10.82%, precision = 14.54%, accuracy = 74.46%, f1 = 12.40%
Test metrics
recall = 8.58%, precision = 57.10%, accuracy = 82.90%, f1 = 14.92%
Epoch = 45
Total (training) batch = 439
training loss = 3011.38541073
Total (training) batch = 108
testing loss = 330.11986400


 92%|█████████▏| 46/50 [47:59<04:05, 61.39s/it]

Train metrics
recall = 6.39%, precision = 9.06%, accuracy = 73.62%, f1 = 7.50%
Test metrics
recall = 14.34%, precision = 29.35%, accuracy = 79.01%, f1 = 19.26%
Epoch = 46
Total (training) batch = 439
training loss = 3143.61155947
Total (training) batch = 108
testing loss = 844.59805411


 94%|█████████▍| 47/50 [49:00<03:03, 61.25s/it]

Train metrics
recall = 5.44%, precision = 7.41%, accuracy = 72.82%, f1 = 6.27%
Test metrics
recall = 7.96%, precision = 17.43%, accuracy = 77.33%, f1 = 10.93%
Epoch = 47
Total (training) batch = 439
training loss = 3337.45968287
Total (training) batch = 108
testing loss = 289.62915943


 96%|█████████▌| 48/50 [50:03<02:03, 61.77s/it]

Train metrics
recall = 4.90%, precision = 7.95%, accuracy = 74.62%, f1 = 6.06%
Test metrics
recall = 7.27%, precision = 33.91%, accuracy = 81.32%, f1 = 11.98%
Epoch = 48
Total (training) batch = 439
training loss = 2786.42918921
Total (training) batch = 108
testing loss = 292.75405093


 98%|█████████▊| 49/50 [51:04<01:01, 61.65s/it]

Train metrics
recall = 4.51%, precision = 11.12%, accuracy = 78.01%, f1 = 6.42%
Test metrics
recall = 9.95%, precision = 30.38%, accuracy = 80.29%, f1 = 14.99%
Epoch = 49
Total (training) batch = 439
training loss = 2992.71072395
Total (training) batch = 108
testing loss = 268.07975260


100%|██████████| 50/50 [52:06<00:00, 62.54s/it]

Train metrics
recall = 3.98%, precision = 9.50%, accuracy = 77.61%, f1 = 5.61%
Test metrics
recall = 2.55%, precision = 34.73%, accuracy = 82.14%, f1 = 4.75%





## 训练总结

不太清楚为何到后来，训练集开始出现recall和precision的波动；最后下降到10%以下（就很离谱？！）

## 模型加载及结果探查

我们使用第8个epoch模型，
- test： recall = 82.04%, precision = 92.75%, accuracy = 95.74%, f1 = 87.07%
- eval： recall = 83.20%, precision = 96.40%, accuracy = 96.67%, f1 = 89.32%



In [73]:
model_load = GLove_BiLSTM_CRF(config=Config)
model_load.load_state_dict(torch.load('./models/model_v3_V3-Emb=Glove300-bilstmCRFLayer=1Hidden=200Dropout0.1Batch=32Learn=1e-1epoch=8F1=0.8707accu=0.9574recall=0.8204precision=0.9275'))

<All keys matched successfully>

In [74]:
model_load

GLove_BiLSTM_CRF(
  (word_embeds): Embedding(30291, 300)
  (lstm): LSTM(300, 50, bidirectional=True)
  (hidden2tag1): Linear(in_features=100, out_features=33, bias=True)
  (hidden2tag2): Linear(in_features=33, out_features=11, bias=True)
  (dropout010): Dropout(p=0.1, inplace=False)
  (dropout020): Dropout(p=0.2, inplace=False)
  (crf): CRF(num_tags=11)
)

In [94]:
test_batch_loss_list_load, test_logits_list_load, test_y_list_load, test_y_len_list_load = func_eval(model_load, 
                                                                           dataset_conll2003_test_loader, 
                                                                           loss_fn=loss_fn)
print('testing loss = {:.8f}'.format(sum(test_batch_loss_list_load)/len(test_batch_loss_list_load)))

#     评估
test_dict_load = MyTools.func_cal_accu_recall(logits_list=test_logits_list_load, y_list=test_y_list_load, 
                                         y_len_list=test_y_len_list_load, is_logits_tag=True)
test_metric_load = MyTools.func_cal_metrics(test_dict_load)

Total (training) batch = 108
testing loss = 79.28989290
recall = 82.04%, precision = 92.75%, accuracy = 95.74%, f1 = 87.07%


In [97]:
eval_batch_loss_list_load, eval_logits_list_load, eval_y_list_load, eval_y_len_list_load = func_eval(model_load, 
                                                                           dataset_conll2003_val_loader, 
                                                                           loss_fn=loss_fn)
print('evaling loss = {:.8f}'.format(sum(eval_batch_loss_list_load)/len(eval_batch_loss_list_load)))

#     评估
eval_dict_load = MyTools.func_cal_accu_recall(logits_list=eval_logits_list_load, y_list=eval_y_list_load, 
                                         y_len_list=eval_y_len_list_load, is_logits_tag=True)
eval_metric_load = MyTools.func_cal_metrics(eval_dict_load)

Total (training) batch = 102
evaling loss = 66.94167350


  assert sum(mask.sum(axis=1) == y_len_list[logit_i]) // batch_len == 1


recall = 83.20%, precision = 96.40%, accuracy = 96.67%, f1 = 89.32%


In [93]:
test_dict_load

{'tp': 6655.0,
 'tn': 37803.0,
 'fp': 520.0,
 'fn': 500.0,
 'others': 957.0,
 'n_total': 46435.0}

In [90]:
t=next(iter(dataset_conll2003_test_loader))
# print(t)
model_load.cuda()
model_load.eval()
y = t[2]
res_arg = model_load(t[0].cuda(), t[1].cuda())

(tensor([[30289,  1831, 30290,  ...,     0,     0,     0],
        [30289,  1910,   389,  ...,     0,     0,     0],
        [30289,  2342,   318,  ...,     0,     0,     0],
        ...,
        [30289,   269, 11672,  ...,     0,     0,     0],
        [30289,  1428,  3760,  ...,     0,     0,     0],
        [30289,  2260,  5889,  ...,     0,     0,     0]]), tensor([ 3, 11, 10, 25,  5, 52,  9,  9,  4,  4,  9, 28,  7,  9, 33, 10,  9, 16,
        37, 43, 36, 12,  4, 27,  9, 23, 10,  8, 15, 43,  6, 22]), tensor([[ 9,  5, 10,  ...,  0,  0,  0],
        [ 9,  0,  0,  ...,  0,  0,  0],
        [ 9,  3,  0,  ...,  0,  0,  0],
        ...,
        [ 9,  0,  0,  ...,  0,  0,  0],
        [ 9,  3,  0,  ...,  0,  0,  0],
        [ 9,  5,  6,  ...,  0,  0,  0]]), tensor([ 3, 11, 10, 25,  5, 52,  9,  9,  4,  4,  9, 28,  7,  9, 33, 10,  9, 16,
        37, 43, 36, 12,  4, 27,  9, 23, 10,  8, 15, 43,  6, 22]), (['<START>', 'Australia', '<STOP>'], ['<START>', 'Scorers', ':', 'Shkvyrin', 'Igor', '78'

### 案例探查总结

一些误判如下：
- 一些容易修正的误判（加规则或者加CRF）
's, y: O, predict: I-ORG

- 地点误判为机构
United, y: B-LOC, predict: I-ORG Arab, y: I-LOC, predict: I-LOC Emirates, y: I-LOC, predict: I-ORG

In [91]:
# true value
y_ner_list = []
for sent in y:
    tmp = []
    for token in sent:
        tmp.append(ner_id2tag[token.item()])
    y_ner_list.append(tmp)

# predict
res_ner_list = []
for sent in res_arg:
    tmp = []
    for token in sent:
        tmp.append(ner_id2tag[token.item()])
    res_ner_list.append(tmp)

for i, sent in enumerate(t[4]):
    print('='*50)
    for j, token in enumerate(sent):
        print('{}, y: {}, predict: {}'.format(token, y_ner_list[i][j], res_ner_list[i][j]))

<START>, y: <START>, predict: <START>
Australia, y: B-LOC, predict: B-LOC
<STOP>, y: <STOP>, predict: <STOP>
<START>, y: <START>, predict: <START>
Scorers, y: O, predict: O
:, y: O, predict: O
Shkvyrin, y: B-PER, predict: B-PER
Igor, y: I-PER, predict: I-PER
78, y: O, predict: O
,, y: O, predict: O
Shatskikh, y: B-PER, predict: B-PER
Oleg, y: I-PER, predict: I-PER
90, y: O, predict: O
<STOP>, y: <STOP>, predict: <STOP>
<START>, y: <START>, predict: <START>
Everton, y: B-ORG, predict: B-ORG
16, y: O, predict: O
6, y: O, predict: O
6, y: O, predict: O
4, y: O, predict: O
25, y: O, predict: O
20, y: O, predict: O
24, y: O, predict: O
<STOP>, y: <STOP>, predict: <STOP>
<START>, y: <START>, predict: <START>
In, y: O, predict: O
another, y: O, predict: O
attack, y: O, predict: O
,, y: O, predict: O
also, y: O, predict: O
on, y: O, predict: O
the, y: O, predict: O
province, y: O, predict: O
's, y: O, predict: O
south, y: O, predict: O
coast, y: O, predict: O
on, y: O, predict: O
Thursday, y: 