In [1]:
# basic
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
from tqdm import tqdm
import sklearn

# np/pd
import numpy as np
import pandas as pd

# torch
import torch
import torchtext
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence

# transformer
from datasets import load_dataset

# CRF
from torchcrf import CRF

In [2]:
torch.cuda.is_available()

True

In [3]:
Config = {
    'num_tags':9,
    'num_layers':1,
    'embedding_dim':200,
    'vocab_size':23623,
    'hidden_dim':100,
    'batch_size':32
}

## 探查conll2003数据

定义dataset，dataLoader

In [4]:
# data_udpos = torchtext.datasets.UDPOS(root='./torchtext_datasets_udpos/', split=('train','valid','test'))
dataset_conll2003 = load_dataset("conll2003")

Reusing dataset conll2003 (/root/.cache/huggingface/datasets/conll2003/conll2003/1.0.0/9a4d16a94f8674ba3466315300359b0acd891b68b6c8743ddf60b9c702adce98)


  0%|          | 0/3 [00:00<?, ?it/s]

In [5]:
dataset_conll2003['test'][0]

{'id': '0',
 'tokens': ['SOCCER',
  '-',
  'JAPAN',
  'GET',
  'LUCKY',
  'WIN',
  ',',
  'CHINA',
  'IN',
  'SURPRISE',
  'DEFEAT',
  '.'],
 'pos_tags': [21, 8, 22, 37, 22, 22, 6, 22, 15, 12, 21, 7],
 'chunk_tags': [11, 0, 11, 21, 11, 12, 0, 11, 13, 11, 12, 0],
 'ner_tags': [0, 0, 5, 0, 0, 0, 0, 1, 0, 0, 0, 0]}

In [6]:
ner_tag2id = {'O': 0, 'B-PER': 1, 'I-PER': 2, 'B-ORG': 3, 'I-ORG': 4, 'B-LOC': 5, 'I-LOC': 6, 'B-MISC': 7, 'I-MISC': 8}

ner_id2tag = {}
for key in ner_tag2id.keys():
    ner_id2tag[ner_tag2id[key]] = key

In [7]:
ner_id2tag

{0: 'O',
 1: 'B-PER',
 2: 'I-PER',
 3: 'B-ORG',
 4: 'I-ORG',
 5: 'B-LOC',
 6: 'I-LOC',
 7: 'B-MISC',
 8: 'I-MISC'}

In [8]:
def ner_id_to_tags(ner_id_seq):
    res1, res2 = [], []
    for ner_id in ner_id_seq:
        res1.append(ner_id2tag.get(ner_id, ''))
        res2.append(ner_id2tag.get(ner_id, '-').split('-')[-1])
    return res1, res2

ner_id_to_tags(dataset_conll2003['train'][0]['ner_tags'])

(['B-ORG', 'O', 'B-MISC', 'O', 'O', 'O', 'B-MISC', 'O', 'O'],
 ['ORG', 'O', 'MISC', 'O', 'O', 'O', 'MISC', 'O', 'O'])

In [9]:
m = 0
for k in dataset_conll2003.keys():
    for i in dataset_conll2003[k]['tokens']:
        m = max(m, len(i))
print('max length = {}'.format(m))

max length = 124


In [10]:
word_to_ix = {}
for k in dataset_conll2003.keys():
    for tokens in dataset_conll2003[k]['tokens']:
        for token in tokens:
            if token not in word_to_ix:
                word_to_ix[token] = len(word_to_ix)
                
Config['vocab_size'] = len(word_to_ix)

print(Config['vocab_size'])

30289


In [11]:
def func_word2ix(word_to_ix, token_list):
    res = list()
    for token in token_list:
        res.append(word_to_ix.get(token, len(word_to_ix)+1))
    return {'token_ids':res}

In [12]:
dataset_conll2003_train=dataset_conll2003['train'].map(lambda x: func_word2ix(word_to_ix, x['tokens']))
dataset_conll2003_train.set_format(type="torch", columns=['token_ids','ner_tags'])

dataset_conll2003_test=dataset_conll2003['test'].map(lambda x: func_word2ix(word_to_ix, x['tokens']))
dataset_conll2003_test.set_format(type="torch", columns=['token_ids','ner_tags'])

dataset_conll2003_val=dataset_conll2003['validation'].map(lambda x: func_word2ix(word_to_ix, x['tokens']))
dataset_conll2003_val.set_format(type="torch", columns=['token_ids','ner_tags'])

Loading cached processed dataset at /root/.cache/huggingface/datasets/conll2003/conll2003/1.0.0/9a4d16a94f8674ba3466315300359b0acd891b68b6c8743ddf60b9c702adce98/cache-e993f22e7c8f1977.arrow
Loading cached processed dataset at /root/.cache/huggingface/datasets/conll2003/conll2003/1.0.0/9a4d16a94f8674ba3466315300359b0acd891b68b6c8743ddf60b9c702adce98/cache-9da2d1c0e9528511.arrow
Loading cached processed dataset at /root/.cache/huggingface/datasets/conll2003/conll2003/1.0.0/9a4d16a94f8674ba3466315300359b0acd891b68b6c8743ddf60b9c702adce98/cache-586edb6c12fcdba7.arrow


In [13]:
class MyDataset(Dataset):

    def __init__(self, data):
        self.data = data
        self.token_ids = self.data['token_ids'] # 在这变成torch.tensor，但长度不同
        self.ner_tags = self.data['ner_tags']
        self.tokens = self.data['tokens']
        
    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
#         curr = dict()
#         curr['token_ids'] = self.token_ids
#         curr['ner_tags'] = self.ner_tags
        return self.token_ids[index], self.ner_tags[index], self.tokens[index]


def collate_fn_padd(batch):
    '''
    Padds batch of variable length

    note: it converts things ToTensor manually here since the ToTensor transform
    assume it takes in images rather than arbitrary tensors.
    '''
    x, y, z = zip(*batch)
    x_lens = [len(x_i) for x_i in x]
    y_lens = [len(y_i) for y_i in y]
    x_pad = torch.nn.utils.rnn.pad_sequence(x, batch_first=True)
    y_pad = torch.nn.utils.rnn.pad_sequence(y, batch_first=True)
    return x_pad, torch.tensor(x_lens), y_pad, torch.tensor(y_lens), z
    
dataset_conll2003_train_loader = DataLoader(
    MyDataset(dataset_conll2003_train),
    batch_size=32,
    shuffle=True, 
    collate_fn=lambda x: collate_fn_padd(x))


dataset_conll2003_test_loader = DataLoader(
    MyDataset(dataset_conll2003_test),
    batch_size=32,
    shuffle=True, 
    collate_fn=lambda x: collate_fn_padd(x))

In [14]:
next(iter(dataset_conll2003_train_loader))[4][0]

['17',
 '-',
 'Karina',
 'Habsudova',
 '(',
 'Slovakia',
 ')',
 'beat',
 'Nathalie',
 'Dechy',
 '(',
 'France',
 ')',
 '6-4',
 '6-2']

In [15]:
print(dataset_conll2003_train_loader.dataset.__len__(), dataset_conll2003_test_loader.dataset.__len__())

14041 3453


## 模型定义

In [38]:
class BiLSTM_CRF(nn.Module):

    def __init__(self, config=None):
        super(BiLSTM_CRF, self).__init__()
        self.config = config

        # BiLSTM-model 给 emission 层定义参数
        self.embedding_dim = self.config.get('embedding_dim', 200)
        self.hidden_dim = self.config.get('hidden_dim', 200)
        self.vocab_size = self.config.get('vocab_size', 30289)

        self.word_embeds = nn.Embedding(self.vocab_size, self.embedding_dim)
        self.target_size = self.config.get('num_tags', 9)
        self.num_layers = self.config.get('num_layers',1)
        self.batch_size = self.config.get('batch_size',16)
        self.bidirectional = True

        # lstm
        self.lstm = nn.LSTM(input_size=self.embedding_dim, hidden_size=self.hidden_dim//2,
                            num_layers=self.num_layers, bidirectional=self.bidirectional)
        self.hidden2tag1 = nn.Linear(self.hidden_dim, self.target_size*3)
        self.hidden2tag2 = nn.Linear(self.target_size*3, self.target_size)
        self.dropout020 = nn.Dropout(0.2)
#         self.hidden_init = self.init_hidden()

        # CRF-model
        self.crf = CRF(self.config.get('num_tags', 9), batch_first=True)

    def init_hidden(self):
        hidden = torch.zeros(self.num_layers*2 if self.bidirectional else self.num_layers,
                             self.batch_size, self.hidden_dim//2)
        cell = torch.zeros(self.num_layers*2 if self.bidirectional else self.num_layers,
                             self.batch_size, self.hidden_dim//2)
        return hidden, cell

    def forward(self, sent, sent_len):
        """

        :param sent: 输入的已转换为token_id的句子，(batch_len * sent_len * token_emb_len)
        :param sent_len: tensor(list(int))
        :return:
        """
        embeds = self.word_embeds(sent)
        embed_packed = pack_padded_sequence(embeds, lengths=sent_len.to('cpu'),
                                            batch_first=True,
                                            enforce_sorted=False)
        lstm_out, (hidden, cell) = self.lstm(embed_packed) #, self.hidden_init)
        lstm_out, lens = pad_packed_sequence(lstm_out, batch_first=True)
        tag_score = self.hidden2tag1(lstm_out)
        tag_score = self.dropout020(tag_score)
        tag_score = self.hidden2tag2(tag_score)
#         tag_score = nn.functional.softmax(tag_score, dim=-1)
        return tag_score

In [39]:
model_name = 'V1-Embrand200-bilstmLayer=1Hidden=200Dropout0.2Batch=32Learn=1e-1'
model_lstm = BiLSTM_CRF(config=Config)

In [40]:
model_lstm = model_lstm.cuda()

In [41]:
model_lstm.parameters

<bound method Module.parameters of BiLSTM_CRF(
  (word_embeds): Embedding(30289, 200)
  (lstm): LSTM(200, 50, bidirectional=True)
  (hidden2tag1): Linear(in_features=100, out_features=27, bias=True)
  (hidden2tag2): Linear(in_features=27, out_features=9, bias=True)
  (dropout020): Dropout(p=0.2, inplace=False)
  (crf): CRF(num_tags=9)
)>

In [42]:
# 手动计算验证权重
import collections
ner_tags_all = torch.cat(dataset_conll2003_train['ner_tags'])
t=collections.Counter(ner_tags_all.numpy())
res = []
for k in t:
    res.append((k, len(ner_tags_all)/9/t[k]))
print(res)

[(3, 3.5792683998664065), (0, 0.1334168085220698), (7, 6.580731691551936), (1, 3.4279629629629627), (2, 4.996589124460149), (5, 3.1687052598817305), (4, 6.108141348692104), (8, 19.58835978835979), (6, 19.554499183712664)]


In [43]:
class_weights=sklearn.utils.class_weight.compute_class_weight(
    class_weight='balanced',classes=np.unique(ner_tags_all),y=ner_tags_all.numpy())
class_weights=torch.tensor(class_weights,dtype=torch.float)
class_weights

tensor([ 0.1334,  3.4280,  4.9966,  3.5793,  6.1081,  3.1687, 19.5545,  6.5807,
        19.5884])

In [44]:
optimizer = torch.optim.Adam(model_lstm.parameters(), lr=1e-1)
loss_fn = nn.CrossEntropyLoss(reduction='mean', weight=class_weights.cuda()) 
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)

In [45]:
def train_eval_single_epoch(model, data_iter=None, optimizer=None, loss_fn=None, is_train=False):
    if is_train:
        model.train()
    else:
        model.eval()   
    correct_curr, correct_sum, loss_sum, loss_curr = 0, 0, 0, 0
    loss_list, accuracy_list = [], []
    print('Total (training) batch = {}'.format(len(data_iter)))
    batch_i = 0
    data_iter_len = data_iter.dataset.__len__() # total sample num.
    batch_num = len(data_iter)
    
    batch_loss_list = list()
    logits_list = list()
    y_list, y_len_list = [], []
    for batch_data in data_iter:
        torch.cuda.empty_cache()
        batch_i += 1
        if is_train:
            optimizer.zero_grad()
#             model.zero_grad()
        x, x_len, y, y_len, _ = batch_data
        x = x.cuda()
        x_len = x_len.cuda()
        y = y.cuda()
        # model predict 
        logits = model(x, x_len)
        assert logits.shape[0:2]==x.shape[0:2]
        # compute loss 
        batch_loss = 0
        for i in range(logits.size(0)): # num of samples in one batch
            loss = loss_fn(logits[i], y[i])
            batch_loss += loss 
        
        batch_loss /= logits.size(0)
        if is_train:
            batch_loss.backward()
            optimizer.step()
        
        # 记录
        batch_loss_list.append(batch_loss.item())
        logits_list.append(logits)
        y_list.append(y)
        y_len_list.append(y_len)
#         if batch_i%100==0:
#             print(batch_loss.item())
        
        x = x.cpu()
        x_len = x_len.cpu()
        y = y.cpu()
        
#     print(sum(batch_loss_list)/batch_num)
    return batch_loss_list, logits_list, y_list, y_len_list

In [46]:
def func_cal_accu_recall(logits_list=None, y_list=None, y_len_list=None):
    eval_dict = {'tp':0,'tn':0,'fp':0,'fn':0,'others':0,'n_total':0}
    n_total = 0
    for logit_i, logit in enumerate(logits_list):
        assert logit.shape[0:2] == y_list[logit_i].shape
        batch_len = len(y_len_list[logit_i])
        batch_max_seqlen = max(y_len_list[logit_i])
        y_curr = y_list[logit_i]
        logit_argmax = torch.argmax(logit, dim=2)

        # get mask matrix
        mask = torch.zeros((batch_len, batch_max_seqlen))
        for mask_i in range(mask.shape[0]):
            mask[mask_i][0:y_len_list[logit_i][mask_i]] = 1
        assert sum(mask.sum(axis=1)==y_len_list[logit_i])//batch_len==1

        # cal the tp,tn,fp,fn in this batch
        N = mask.sum()
        TP = ((logit_argmax>0)*(y_curr>0)*(logit_argmax==y_curr)*mask.cuda()).sum()
        TN = ((logit_argmax==0)*(y_curr==0)*(logit_argmax==y_curr)*mask.cuda()).sum()
        FP = ((logit_argmax>0)*(y_curr==0)*(logit_argmax!=y_curr)*mask.cuda()).sum()
        FN = ((logit_argmax==0)*(y_curr>0)*(logit_argmax!=y_curr)*mask.cuda()).sum()
        others = ((logit_argmax>0)*(y_curr>0)*(logit_argmax!=y_curr)*mask.cuda()).sum()

        eval_dict['tp'] += TP.item()
        eval_dict['tn'] += TN.item()
        eval_dict['fp'] += FP.item()
        eval_dict['fn'] += FN.item()
        eval_dict['others'] += others.item()
        eval_dict['n_total'] += N.item()

        accu = (TP+TN) / N
        recall = TP / (TP + FN)
#         print(accu, recall)
    
    if False:
        print('Total accu = {:.2f}% recall = {:.2f}%'.format(
            (eval_dict['tp'] + eval_dict['tn'])/eval_dict['n_total']*100, 
            eval_dict['tp']/(eval_dict['tp'] + eval_dict['fn'])*100))
    return eval_dict

In [47]:
def func_eval(model, data_iter=None, loss_fn=None):
    model.eval()
    correct_curr, correct_sum, loss_sum, loss_curr = 0, 0, 0, 0
    loss_list, accuracy_list = [], []
    print('Total (training) batch = {}'.format(len(data_iter)))
    batch_i = 0
    data_iter_len = data_iter.dataset.__len__() # total sample num.
    batch_num = len(data_iter)
    
    batch_loss_list = list()
    logits_list = list()
    y_list, y_len_list = [], []
    for batch_data in data_iter:
        batch_i += 1
        x, x_len, y, y_len,_ = batch_data
        x = x.cuda()
        x_len = x_len.cuda()
        y = y.cuda()
        # model predict 
        logits = model(x, x_len)
        assert logits.shape[0:2]==x.shape[0:2]
        # compute loss 
        batch_loss = 0
        for i in range(logits.size(0)): # num of samples in one batch
            loss = loss_fn(logits[i], y[i])
            batch_loss += loss 
        
        batch_loss /= logits.size(0)
        
        # 记录
        batch_loss_list.append(batch_loss.item())
        logits_list.append(logits)
        y_list.append(y)
        y_len_list.append(y_len)
#         if batch_i%100==0:
#             print(batch_loss.item())
        
        x = x.cpu()
        x_len = x_len.cpu()
        y = y.cpu()
        
#     print(sum(batch_loss_list)/batch_num)
    return batch_loss_list, logits_list, y_list, y_len_list

## 训练开始

In [48]:
F1_test_max, F1_test_curr = 0, 0

for epoch in tqdm(range(50)):
    print('='*50)
    print('Epoch = {}'.format(epoch))
    print('='*50)
    batch_loss_list, logits_list, y_list, y_len_list = train_eval_single_epoch(model_lstm, 
                                                                               dataset_conll2003_train_loader, 
                                                                               optimizer=optimizer,
                                                                               loss_fn=loss_fn,
                                                                               is_train=True)
    scheduler.step() # 加上后好一些
    print('training loss = {:.8f}'.format(sum(batch_loss_list)/len(batch_loss_list)))
    test_batch_loss_list, test_logits_list, test_y_list, test_y_len_list = func_eval(model_lstm, 
                                                                               dataset_conll2003_test_loader, 
                                                                               loss_fn=loss_fn)
    print('testing loss = {:.8f}'.format(sum(test_batch_loss_list)/len(test_batch_loss_list)))
    
    # 评估
    train_dict = func_cal_accu_recall(logits_list=logits_list, y_list=y_list, y_len_list=y_len_list)
    test_dict = func_cal_accu_recall(logits_list=test_logits_list, y_list=test_y_list, y_len_list=test_y_len_list)
    
    print('Train :Total accu = {:.2f}% recall = {:.2f}%'.format(
            (train_dict['tp'] + train_dict['tn'])/train_dict['n_total']*100, 
            train_dict['tp']/(train_dict['tp'] + train_dict['fn'])*100))
    
    print('Test :Total accu = {:.2f}% recall = {:.2f}%'.format(
            (test_dict['tp'] + test_dict['tn'])/test_dict['n_total']*100, 
            test_dict['tp']/(test_dict['tp'] + test_dict['fn'])*100))
    
    # save model if current f1 rate is better than previous ones 
    accu_test = (test_dict['tp'] + test_dict['tn'])/test_dict['n_total']
    recall_test = test_dict['tp']/(test_dict['tp'] + test_dict['fn'])
    F1_test_curr = 1/(1/accu_test+1/recall_test)
    if F1_test_curr>F1_test_max:
        torch.save(model_lstm.state_dict(), './models/model_v1_'+model_name+'epoch='+str(epoch)+
                   'accu='+str(round(accu_test,4))+
                   'recall='+str(round(recall_test,4))+
                   'F1='+str(round(F1_test_curr,4)))
    F1_test_max = max(F1_test_curr, F1_test_max)

  0%|          | 0/50 [00:00<?, ?it/s]

Epoch = 0
Total (training) batch = 439
training loss = 1.47737321
Total (training) batch = 108
testing loss = 1.32306615


  from ipykernel import kernelapp as app
  2%|▏         | 1/50 [00:14<11:54, 14.58s/it]

Train :Total accu = 61.93% recall = 54.59%
Test :Total accu = 66.42% recall = 51.47%
Epoch = 1
Total (training) batch = 439
training loss = 1.34920077
Total (training) batch = 108
testing loss = 1.30786751


  4%|▍         | 2/50 [00:29<11:38, 14.54s/it]

Train :Total accu = 66.65% recall = 62.79%
Test :Total accu = 66.44% recall = 60.12%
Epoch = 2
Total (training) batch = 439
training loss = 1.22810090
Total (training) batch = 108
testing loss = 1.23127040


  6%|▌         | 3/50 [00:43<11:19, 14.46s/it]

Train :Total accu = 69.19% recall = 68.70%
Test :Total accu = 66.43% recall = 59.80%
Epoch = 3
Total (training) batch = 439
training loss = 1.12699140
Total (training) batch = 108
testing loss = 1.17240155


  8%|▊         | 4/50 [00:58<11:06, 14.50s/it]

Train :Total accu = 71.50% recall = 74.49%
Test :Total accu = 70.05% recall = 69.99%
Epoch = 4
Total (training) batch = 439
training loss = 1.02834336
Total (training) batch = 108
testing loss = 1.11882982


 10%|█         | 5/50 [01:12<10:46, 14.37s/it]

Train :Total accu = 74.71% recall = 79.21%
Test :Total accu = 67.13% recall = 78.09%
Epoch = 5
Total (training) batch = 439
training loss = 0.95345893
Total (training) batch = 108
testing loss = 1.17005050


 12%|█▏        | 6/50 [01:26<10:31, 14.35s/it]

Train :Total accu = 76.58% recall = 82.20%
Test :Total accu = 71.55% recall = 70.41%
Epoch = 6
Total (training) batch = 439
training loss = 0.88314902
Total (training) batch = 108
testing loss = 1.08234139


 14%|█▍        | 7/50 [01:41<10:21, 14.46s/it]

Train :Total accu = 77.93% recall = 84.36%
Test :Total accu = 73.02% recall = 75.17%
Epoch = 7
Total (training) batch = 439
training loss = 0.81084800
Total (training) batch = 108
testing loss = 1.05042877


 16%|█▌        | 8/50 [01:55<10:07, 14.47s/it]

Train :Total accu = 79.87% recall = 86.62%
Test :Total accu = 73.65% recall = 77.84%
Epoch = 8
Total (training) batch = 439
training loss = 0.76767864
Total (training) batch = 108
testing loss = 1.06337513


 18%|█▊        | 9/50 [02:10<09:55, 14.52s/it]

Train :Total accu = 81.03% recall = 88.58%
Test :Total accu = 75.25% recall = 79.45%
Epoch = 9
Total (training) batch = 439
training loss = 0.71411565
Total (training) batch = 108
testing loss = 1.00159833


 20%|██        | 10/50 [02:24<09:39, 14.49s/it]

Train :Total accu = 82.21% recall = 89.69%
Test :Total accu = 76.57% recall = 79.34%
Epoch = 10
Total (training) batch = 439
training loss = 0.65646292
Total (training) batch = 108
testing loss = 1.02983023


 22%|██▏       | 11/50 [02:39<09:24, 14.47s/it]

Train :Total accu = 84.02% recall = 91.27%
Test :Total accu = 78.51% recall = 78.05%
Epoch = 11
Total (training) batch = 439
training loss = 0.61907423
Total (training) batch = 108
testing loss = 0.99718606


 24%|██▍       | 12/50 [02:53<09:07, 14.41s/it]

Train :Total accu = 85.03% recall = 92.30%
Test :Total accu = 77.18% recall = 83.73%
Epoch = 12
Total (training) batch = 439
training loss = 0.57020208
Total (training) batch = 108
testing loss = 0.98591107


 26%|██▌       | 13/50 [03:07<08:50, 14.33s/it]

Train :Total accu = 86.03% recall = 93.30%
Test :Total accu = 77.75% recall = 84.17%
Epoch = 13
Total (training) batch = 439
training loss = 0.53250571
Total (training) batch = 108
testing loss = 0.97710483


 28%|██▊       | 14/50 [03:21<08:35, 14.31s/it]

Train :Total accu = 87.08% recall = 93.95%
Test :Total accu = 78.37% recall = 84.52%
Epoch = 14
Total (training) batch = 439
training loss = 0.49239215
Total (training) batch = 108
testing loss = 0.96692959


 30%|███       | 15/50 [03:35<08:18, 14.25s/it]

Train :Total accu = 87.90% recall = 94.76%
Test :Total accu = 80.09% recall = 83.98%
Epoch = 15
Total (training) batch = 439
training loss = 0.46029764
Total (training) batch = 108
testing loss = 0.99440030


 32%|███▏      | 16/50 [03:49<08:02, 14.18s/it]

Train :Total accu = 88.90% recall = 95.46%
Test :Total accu = 80.37% recall = 83.23%
Epoch = 16
Total (training) batch = 439
training loss = 0.42833661
Total (training) batch = 108
testing loss = 1.00863572


 34%|███▍      | 17/50 [04:04<07:47, 14.16s/it]

Train :Total accu = 89.64% recall = 95.84%
Test :Total accu = 82.22% recall = 82.01%
Epoch = 17
Total (training) batch = 439
training loss = 0.39981317
Total (training) batch = 108
testing loss = 0.95762105


 36%|███▌      | 18/50 [04:18<07:36, 14.26s/it]

Train :Total accu = 90.46% recall = 96.28%
Test :Total accu = 81.88% recall = 84.73%
Epoch = 18
Total (training) batch = 439
training loss = 0.37458333
Total (training) batch = 108
testing loss = 0.99863272


 38%|███▊      | 19/50 [04:33<07:24, 14.34s/it]

Train :Total accu = 90.81% recall = 96.53%
Test :Total accu = 80.82% recall = 86.91%
Epoch = 19
Total (training) batch = 439
training loss = 0.35741328
Total (training) batch = 108
testing loss = 0.99089279


 40%|████      | 20/50 [04:47<07:10, 14.36s/it]

Train :Total accu = 91.33% recall = 96.94%
Test :Total accu = 81.77% recall = 85.50%
Epoch = 20
Total (training) batch = 439
training loss = 0.33343834
Total (training) batch = 108
testing loss = 0.99190164


 42%|████▏     | 21/50 [05:01<06:55, 14.32s/it]

Train :Total accu = 91.96% recall = 97.18%
Test :Total accu = 82.25% recall = 86.08%
Epoch = 21
Total (training) batch = 439
training loss = 0.31725261
Total (training) batch = 108
testing loss = 0.99123073


 44%|████▍     | 22/50 [05:15<06:40, 14.30s/it]

Train :Total accu = 92.31% recall = 97.40%
Test :Total accu = 82.63% recall = 86.58%
Epoch = 22
Total (training) batch = 439
training loss = 0.30166959
Total (training) batch = 108
testing loss = 0.99681492


 46%|████▌     | 23/50 [05:30<06:27, 14.36s/it]

Train :Total accu = 92.72% recall = 97.67%
Test :Total accu = 82.39% recall = 86.51%
Epoch = 23
Total (training) batch = 439
training loss = 0.29028601
Total (training) batch = 108
testing loss = 1.02036545


 48%|████▊     | 24/50 [05:44<06:13, 14.36s/it]

Train :Total accu = 93.13% recall = 97.76%
Test :Total accu = 83.06% recall = 85.19%
Epoch = 24
Total (training) batch = 439
training loss = 0.28066212
Total (training) batch = 108
testing loss = 1.01410912


 50%|█████     | 25/50 [05:59<06:01, 14.47s/it]

Train :Total accu = 93.34% recall = 97.94%
Test :Total accu = 82.92% recall = 86.75%
Epoch = 25
Total (training) batch = 439
training loss = 0.26817774
Total (training) batch = 108
testing loss = 1.03993276


 52%|█████▏    | 26/50 [06:13<05:46, 14.44s/it]

Train :Total accu = 93.57% recall = 98.05%
Test :Total accu = 83.66% recall = 85.38%
Epoch = 26
Total (training) batch = 439
training loss = 0.26160829
Total (training) batch = 108
testing loss = 1.02681889


 54%|█████▍    | 27/50 [06:28<05:31, 14.43s/it]

Train :Total accu = 93.80% recall = 98.10%
Test :Total accu = 82.72% recall = 86.86%
Epoch = 27
Total (training) batch = 439
training loss = 0.25630625
Total (training) batch = 108
testing loss = 1.03793268


 56%|█████▌    | 28/50 [06:43<05:19, 14.53s/it]

Train :Total accu = 93.91% recall = 98.18%
Test :Total accu = 83.43% recall = 85.81%
Epoch = 28
Total (training) batch = 439
training loss = 0.25223152
Total (training) batch = 108
testing loss = 1.02639717


 58%|█████▊    | 29/50 [06:57<05:04, 14.50s/it]

Train :Total accu = 94.04% recall = 98.11%
Test :Total accu = 83.13% recall = 86.22%
Epoch = 29
Total (training) batch = 439
training loss = 0.24762546
Total (training) batch = 108
testing loss = 1.01222943


 60%|██████    | 30/50 [07:11<04:47, 14.40s/it]

Train :Total accu = 94.17% recall = 98.23%
Test :Total accu = 83.42% recall = 86.11%
Epoch = 30
Total (training) batch = 439
training loss = 0.24440253
Total (training) batch = 108
testing loss = 1.01289203


 62%|██████▏   | 31/50 [07:25<04:32, 14.33s/it]

Train :Total accu = 94.23% recall = 98.24%
Test :Total accu = 82.98% recall = 87.11%
Epoch = 31
Total (training) batch = 439
training loss = 0.24133764
Total (training) batch = 108
testing loss = 1.02579959


 64%|██████▍   | 32/50 [07:40<04:17, 14.33s/it]

Train :Total accu = 94.35% recall = 98.30%
Test :Total accu = 83.04% recall = 87.35%
Epoch = 32
Total (training) batch = 439
training loss = 0.24006433
Total (training) batch = 108
testing loss = 1.00970561


 66%|██████▌   | 33/50 [07:54<04:03, 14.30s/it]

Train :Total accu = 94.32% recall = 98.31%
Test :Total accu = 82.58% recall = 87.40%
Epoch = 33
Total (training) batch = 439
training loss = 0.23628964
Total (training) batch = 108
testing loss = 1.01471566


 68%|██████▊   | 34/50 [08:08<03:48, 14.27s/it]

Train :Total accu = 94.40% recall = 98.33%
Test :Total accu = 83.24% recall = 87.24%
Epoch = 34
Total (training) batch = 439
training loss = 0.23502508
Total (training) batch = 108
testing loss = 1.02425628


 70%|███████   | 35/50 [08:22<03:34, 14.31s/it]

Train :Total accu = 94.43% recall = 98.36%
Test :Total accu = 83.37% recall = 86.88%
Epoch = 35
Total (training) batch = 439
training loss = 0.23397048
Total (training) batch = 108
testing loss = 1.03035467


 72%|███████▏  | 36/50 [08:37<03:20, 14.35s/it]

Train :Total accu = 94.53% recall = 98.45%
Test :Total accu = 83.34% recall = 87.24%
Epoch = 36
Total (training) batch = 439
training loss = 0.23261768
Total (training) batch = 108
testing loss = 1.02608568


 74%|███████▍  | 37/50 [08:51<03:06, 14.31s/it]

Train :Total accu = 94.50% recall = 98.40%
Test :Total accu = 83.69% recall = 86.61%
Epoch = 37
Total (training) batch = 439
training loss = 0.23222153
Total (training) batch = 108
testing loss = 1.02450330


 76%|███████▌  | 38/50 [09:06<02:52, 14.36s/it]

Train :Total accu = 94.58% recall = 98.36%
Test :Total accu = 83.12% recall = 87.79%
Epoch = 38
Total (training) batch = 439
training loss = 0.23077605
Total (training) batch = 108
testing loss = 1.02290576


 78%|███████▊  | 39/50 [09:20<02:38, 14.39s/it]

Train :Total accu = 94.55% recall = 98.38%
Test :Total accu = 83.43% recall = 87.09%
Epoch = 39
Total (training) batch = 439
training loss = 0.22956086
Total (training) batch = 108
testing loss = 1.02087555


 80%|████████  | 40/50 [09:35<02:24, 14.43s/it]

Train :Total accu = 94.60% recall = 98.47%
Test :Total accu = 83.58% recall = 86.91%
Epoch = 40
Total (training) batch = 439
training loss = 0.22827564
Total (training) batch = 108
testing loss = 1.02821747


 82%|████████▏ | 41/50 [09:49<02:08, 14.31s/it]

Train :Total accu = 94.69% recall = 98.45%
Test :Total accu = 83.23% recall = 87.45%
Epoch = 41
Total (training) batch = 439
training loss = 0.22680441
Total (training) batch = 108
testing loss = 1.03493674


 84%|████████▍ | 42/50 [10:03<01:54, 14.36s/it]

Train :Total accu = 94.62% recall = 98.43%
Test :Total accu = 83.50% recall = 87.10%
Epoch = 42
Total (training) batch = 439
training loss = 0.22666198
Total (training) batch = 108
testing loss = 1.02474518


 86%|████████▌ | 43/50 [10:18<01:40, 14.37s/it]

Train :Total accu = 94.60% recall = 98.41%
Test :Total accu = 83.53% recall = 86.90%
Epoch = 43
Total (training) batch = 439
training loss = 0.22559338
Total (training) batch = 108
testing loss = 1.02921211


 88%|████████▊ | 44/50 [10:32<01:26, 14.36s/it]

Train :Total accu = 94.63% recall = 98.41%
Test :Total accu = 83.54% recall = 87.03%
Epoch = 44
Total (training) batch = 439
training loss = 0.22520625
Total (training) batch = 108
testing loss = 1.02518640


 90%|█████████ | 45/50 [10:46<01:11, 14.36s/it]

Train :Total accu = 94.69% recall = 98.43%
Test :Total accu = 83.30% recall = 87.46%
Epoch = 45
Total (training) batch = 439
training loss = 0.22203133
Total (training) batch = 108
testing loss = 1.02706055


 92%|█████████▏| 46/50 [11:01<00:57, 14.37s/it]

Train :Total accu = 94.69% recall = 98.54%
Test :Total accu = 83.53% recall = 87.08%
Epoch = 46
Total (training) batch = 439
training loss = 0.22351809
Total (training) batch = 108
testing loss = 1.02710268


 94%|█████████▍| 47/50 [11:15<00:43, 14.34s/it]

Train :Total accu = 94.72% recall = 98.47%
Test :Total accu = 83.41% recall = 87.14%
Epoch = 47
Total (training) batch = 439
training loss = 0.22167528
Total (training) batch = 108
testing loss = 1.03393611


 96%|█████████▌| 48/50 [11:29<00:28, 14.32s/it]

Train :Total accu = 94.74% recall = 98.45%
Test :Total accu = 83.34% recall = 87.36%
Epoch = 48
Total (training) batch = 439
training loss = 0.22260550
Total (training) batch = 108
testing loss = 1.03732993


 98%|█████████▊| 49/50 [11:43<00:14, 14.32s/it]

Train :Total accu = 94.70% recall = 98.49%
Test :Total accu = 83.47% recall = 87.20%
Epoch = 49
Total (training) batch = 439
training loss = 0.22193933
Total (training) batch = 108
testing loss = 1.02778368


100%|██████████| 50/50 [11:58<00:00, 14.37s/it]

Train :Total accu = 94.74% recall = 98.46%
Test :Total accu = 83.58% recall = 87.03%





In [50]:
for epoch in tqdm(range(50,70)):
    print('='*50)
    print('Epoch = {}'.format(epoch))
    print('='*50)
    batch_loss_list, logits_list, y_list, y_len_list = train_eval_single_epoch(model_lstm, 
                                                                               dataset_conll2003_train_loader, 
                                                                               optimizer=optimizer,
                                                                               loss_fn=loss_fn,
                                                                               is_train=True)
    scheduler.step() # 加上后好一些
    print('training loss = {:.8f}'.format(sum(batch_loss_list)/len(batch_loss_list)))
    test_batch_loss_list, test_logits_list, test_y_list, test_y_len_list = func_eval(model_lstm, 
                                                                               dataset_conll2003_test_loader, 
                                                                               loss_fn=loss_fn)
    print('testing loss = {:.8f}'.format(sum(test_batch_loss_list)/len(test_batch_loss_list)))
    
    # 评估
    train_dict = func_cal_accu_recall(logits_list=logits_list, y_list=y_list, y_len_list=y_len_list)
    test_dict = func_cal_accu_recall(logits_list=test_logits_list, y_list=test_y_list, y_len_list=test_y_len_list)
    
    print('Train :Total accu = {:.2f}% recall = {:.2f}%'.format(
            (train_dict['tp'] + train_dict['tn'])/train_dict['n_total']*100, 
            train_dict['tp']/(train_dict['tp'] + train_dict['fn'])*100))
    
    print('Test :Total accu = {:.2f}% recall = {:.2f}%'.format(
            (test_dict['tp'] + test_dict['tn'])/test_dict['n_total']*100, 
            test_dict['tp']/(test_dict['tp'] + test_dict['fn'])*100))
    
    # save model if current f1 rate is better than previous ones 
    accu_test = (test_dict['tp'] + test_dict['tn'])/test_dict['n_total']
    recall_test = test_dict['tp']/(test_dict['tp'] + test_dict['fn'])
    F1_test_curr = 1/(1/accu_test+1/recall_test)
    if F1_test_curr>F1_test_max:
        torch.save(model_lstm.state_dict(), './models/model_v1_'+model_name+'epoch='+str(epoch)+
                   'accu='+str(round(accu_test,4))+
                   'recall='+str(round(recall_test,4))+
                   'F1='+str(round(F1_test_curr,4)))
    F1_test_max = max(F1_test_curr, F1_test_max)

  0%|          | 0/20 [00:00<?, ?it/s]

Epoch = 50
Total (training) batch = 439
training loss = 0.22028840
Total (training) batch = 108
testing loss = 1.03339294


  from ipykernel import kernelapp as app
  5%|▌         | 1/20 [00:14<04:39, 14.70s/it]

Train :Total accu = 94.76% recall = 98.45%
Test :Total accu = 83.41% recall = 87.19%
Epoch = 51
Total (training) batch = 439
training loss = 0.21958178
Total (training) batch = 108
testing loss = 1.04335617


 10%|█         | 2/20 [00:29<04:22, 14.61s/it]

Train :Total accu = 94.73% recall = 98.49%
Test :Total accu = 83.55% recall = 86.95%
Epoch = 52
Total (training) batch = 439
training loss = 0.21985352
Total (training) batch = 108
testing loss = 1.03043874


 15%|█▌        | 3/20 [00:43<04:07, 14.57s/it]

Train :Total accu = 94.71% recall = 98.51%
Test :Total accu = 83.43% recall = 87.15%
Epoch = 53
Total (training) batch = 439
training loss = 0.22113321
Total (training) batch = 108
testing loss = 1.03107773


 20%|██        | 4/20 [00:58<03:51, 14.50s/it]

Train :Total accu = 94.71% recall = 98.52%
Test :Total accu = 83.60% recall = 86.88%
Epoch = 54
Total (training) batch = 439
training loss = 0.22038740
Total (training) batch = 108
testing loss = 1.03071490


 25%|██▌       | 5/20 [01:12<03:36, 14.44s/it]

Train :Total accu = 94.79% recall = 98.48%
Test :Total accu = 83.50% recall = 86.95%
Epoch = 55
Total (training) batch = 439
training loss = 0.22095717
Total (training) batch = 108
testing loss = 1.03156417


 30%|███       | 6/20 [01:26<03:20, 14.35s/it]

Train :Total accu = 94.80% recall = 98.50%
Test :Total accu = 83.45% recall = 87.14%
Epoch = 56
Total (training) batch = 439
training loss = 0.22138221
Total (training) batch = 108
testing loss = 1.03338755


 35%|███▌      | 7/20 [01:41<03:06, 14.37s/it]

Train :Total accu = 94.77% recall = 98.43%
Test :Total accu = 83.43% recall = 87.24%
Epoch = 57
Total (training) batch = 439
training loss = 0.21964724
Total (training) batch = 108
testing loss = 1.02886947


 40%|████      | 8/20 [01:55<02:53, 14.49s/it]

Train :Total accu = 94.78% recall = 98.46%
Test :Total accu = 83.53% recall = 87.05%
Epoch = 58
Total (training) batch = 439
training loss = 0.21924993
Total (training) batch = 108
testing loss = 1.03327863


 45%|████▌     | 9/20 [02:10<02:39, 14.49s/it]

Train :Total accu = 94.80% recall = 98.50%
Test :Total accu = 83.53% recall = 87.01%
Epoch = 59
Total (training) batch = 439
training loss = 0.22017605
Total (training) batch = 108
testing loss = 1.02911192


 50%|█████     | 10/20 [02:25<02:25, 14.57s/it]

Train :Total accu = 94.80% recall = 98.49%
Test :Total accu = 83.46% recall = 87.16%
Epoch = 60
Total (training) batch = 439
training loss = 0.21954502
Total (training) batch = 108
testing loss = 1.03007094


 55%|█████▌    | 11/20 [02:39<02:11, 14.64s/it]

Train :Total accu = 94.77% recall = 98.49%
Test :Total accu = 83.48% recall = 87.09%
Epoch = 61
Total (training) batch = 439
training loss = 0.21803828
Total (training) batch = 108
testing loss = 1.03711423


 60%|██████    | 12/20 [02:54<01:56, 14.58s/it]

Train :Total accu = 94.75% recall = 98.45%
Test :Total accu = 83.49% recall = 87.20%
Epoch = 62
Total (training) batch = 439
training loss = 0.21870519
Total (training) batch = 108
testing loss = 1.02511061


 65%|██████▌   | 13/20 [03:08<01:41, 14.50s/it]

Train :Total accu = 94.80% recall = 98.52%
Test :Total accu = 83.50% recall = 87.09%
Epoch = 63
Total (training) batch = 439
training loss = 0.21957169
Total (training) batch = 108
testing loss = 1.02933003


 70%|███████   | 14/20 [03:22<01:26, 14.43s/it]

Train :Total accu = 94.83% recall = 98.53%
Test :Total accu = 83.50% recall = 87.03%
Epoch = 64
Total (training) batch = 439
training loss = 0.21881127
Total (training) batch = 108
testing loss = 1.03341302


 75%|███████▌  | 15/20 [03:37<01:12, 14.44s/it]

Train :Total accu = 94.81% recall = 98.50%
Test :Total accu = 83.50% recall = 87.02%
Epoch = 65
Total (training) batch = 439
training loss = 0.21914673
Total (training) batch = 108
testing loss = 1.03151531


 80%|████████  | 16/20 [03:52<00:58, 14.58s/it]

Train :Total accu = 94.79% recall = 98.46%
Test :Total accu = 83.45% recall = 87.11%
Epoch = 66
Total (training) batch = 439
training loss = 0.21696893
Total (training) batch = 108
testing loss = 1.03725075


 85%|████████▌ | 17/20 [04:06<00:43, 14.58s/it]

Train :Total accu = 94.83% recall = 98.52%
Test :Total accu = 83.52% recall = 87.06%
Epoch = 67
Total (training) batch = 439
training loss = 0.21733979
Total (training) batch = 108
testing loss = 1.03242237


 90%|█████████ | 18/20 [04:21<00:29, 14.59s/it]

Train :Total accu = 94.87% recall = 98.45%
Test :Total accu = 83.46% recall = 87.13%
Epoch = 68
Total (training) batch = 439
training loss = 0.21888341
Total (training) batch = 108
testing loss = 1.03608008


 95%|█████████▌| 19/20 [04:36<00:14, 14.57s/it]

Train :Total accu = 94.74% recall = 98.51%
Test :Total accu = 83.48% recall = 87.07%
Epoch = 69
Total (training) batch = 439
training loss = 0.21793188
Total (training) batch = 108
testing loss = 1.02967578


100%|██████████| 20/20 [04:50<00:00, 14.51s/it]

Train :Total accu = 94.86% recall = 98.47%
Test :Total accu = 83.49% recall = 87.06%





## 训练总结

能发现验证集在 21 epoch 时已达到最好状态，之后train开始过拟合，eval 也没办法继续提升准确率。

=====V0=====

epoch=0
- Train :Total accu = 68.88% recall = 73.88%
- Test :Total accu = 66.67% recall = 73.10%

epoch=10
- Train :Total accu = 88.65% recall = 95.18%
- Test :Total accu = 79.97% recall = 86.82%

epoch=21
- Train :Total accu = 94.92% recall = 98.48%
- Test :Total accu = 83.26% recall = 90.43%

epoch=50
- Train :Total accu = 96.36% recall = 99.08%
- Test :Total accu = 83.81% recall = 90.54%

=====V1=====