## BERT

In [3]:
import copy
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torch.utils import data
import tensorflow as tf
from tqdm import tqdm,trange
from transformers import *
# from ner_eval import classification_report,csv_report,f1_score
from seqeval.scheme import IOB2
from seqeval.metrics import classification_report,f1_score

from sklearn.model_selection import KFold

In [5]:
data_train = pd.read_csv("IOB2_Data/BCSMM4H_train_IOB2_all.txt",sep = '\t')
data_dev = pd.read_csv("IOB2_Data/BC_dev_IOB2_all.txt",sep = '\t')

In [6]:
data_train = data_train.fillna('NA')
data_dev = data_dev.fillna('NA')

In [7]:
class SentenceGetter(object):
    
    def __init__(self, data):
        self.n_sent = 1
        self.data = data
        self.empty = False
        agg_func = lambda s: [(w, t) for w, t in zip(s["Word"].values.tolist(),
                                                     s["Tag"].values.tolist())]
        self.grouped = self.data.groupby("Sentence#").apply(agg_func)
        self.sentences = [s for s in self.grouped]
    
    def get_next(self):
        try:
            s = self.grouped[self.n_sent]
            self.n_sent += 1
            return s
        except:
            return None

In [8]:
getter = SentenceGetter(data_train)
dev_getter = SentenceGetter(data_dev)

In [9]:
sentences = [[word[0] for word in sentence] for sentence in getter.sentences]
dev_sentences = [[word[0] for word in sentence] for sentence in dev_getter.sentences]

In [10]:
labels = [[s[1] for s in sent] for sent in getter.sentences]
dev_labels = [[s[1] for s in sent] for sent in dev_getter.sentences]

In [11]:
tag_values = list(set(data_train["Tag"].values))
tag_values.append("PAD")
tag_values

['O', 'B-', 'I-', 'PAD']

In [12]:
bs = 16
model_ty = 'bert-base-cased'
# model_ty = 'bert-base-multilingual-cased'
# tokenizer = BertTokenizer.from_pretrained('bert-base-chinese')
tokenizer = BertTokenizer.from_pretrained(model_ty, do_lower_case=False)

Downloading:   0%|          | 0.00/213k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/436k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [13]:
def tokenize_and_preserve_labels(sentence, text_labels):
    tokenized_sentence = []
    labels = []

    for word, label in zip(sentence, text_labels):
        tokenized_word = tokenizer.tokenize(word)
        n_subwords = len(tokenized_word)

        tokenized_sentence.extend(tokenized_word)
        if n_subwords > 1 and 'B-' in label:
            labels.extend([label])
            _ = 'I-' + label.split('B-')[1]
            if _ not in tag_values:
                tag_values.append(_)
                print(_)
            labels.extend([_] * (n_subwords-1))
        else:
            labels.extend([label] * n_subwords)
    return tokenized_sentence, labels

In [14]:
tokenized_texts_and_labels = [
    tokenize_and_preserve_labels(sent, labs)
    for sent, labs in zip(sentences, labels)
]
print('done')
dev_tokenized_texts_and_labels = [
    tokenize_and_preserve_labels(sent, labs)
    for sent, labs in zip(dev_sentences, dev_labels)
]

done


In [15]:
tag2idx = {t: i for i, t in enumerate(tag_values)}
tag2idx

{'O': 0, 'B-': 1, 'I-': 2, 'PAD': 3}

In [16]:
tokenized_texts = [token_label_pair[0] for token_label_pair in tokenized_texts_and_labels]
tokenized_labels = [token_label_pair[1] for token_label_pair in tokenized_texts_and_labels]

dev_tokenized_texts = [token_label_pair[0] for token_label_pair in dev_tokenized_texts_and_labels]
dev_tokenized_labels = [token_label_pair[1] for token_label_pair in dev_tokenized_texts_and_labels]

In [17]:
def check_post_entity(pre_tag,tag):
    error_entity = False
    if pre_tag == 'O' and 'I-' in tag:
        error_entity = True
    if error_entity:
        print(idx+1)
        print(pre_tag,tag)
def inspect_label(lal):
    for n,_ in enumerate(lal):
        if n != 0:
            check_post_entity(lal[n-1],lal[n])
for idx,t_label in enumerate(dev_tokenized_labels):
    inspect_label(t_label)

In [18]:
idx = 3
print(dev_sentences[idx])
print(dev_labels[idx])
print(dev_tokenized_texts[idx],len(dev_tokenized_texts[idx]))
print(dev_tokenized_labels[idx],len(dev_tokenized_labels[idx]))

['Bruh', 'do', 'you', 'wan', 'na', 'fight', 'nah', 'man', 'it', "'s", 'problem']
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
['B', '##ru', '##h', 'do', 'you', 'wa', '##n', 'na', 'fight', 'na', '##h', 'man', 'it', "'", 's', 'problem'] 16
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'] 16


In [19]:
input_ids = [tokenizer.convert_tokens_to_ids(txt) for txt in tokenized_texts]
dev_input_ids = [tokenizer.convert_tokens_to_ids(txt) for txt in dev_tokenized_texts]
tags = [[tag2idx.get(l) for l in lab] for lab in tokenized_labels]
dev_tags = [[tag2idx.get(l,tag2idx['O']) for l in lab] for lab in dev_tokenized_labels]

In [20]:
from torch.utils.data import Dataset
class BertNerDataset(Dataset):
    def __init__(self,sentences,labels, word_pad_idx, tag_pad_idx, max_len = 500):
        self.sentences = sentences
        self.labels = labels
        self.word_pad_idx = word_pad_idx
        self.tag_pad_idx = tag_pad_idx
        self.max_len = max_len
    def __len__(self):
        return len(self.sentences)

    def __getitem__(self, index):
        return (self.sentences[index],self.labels[index])
        
    def collate_fn(self, datasets):
        sentences = [dataset[0] for dataset in datasets]
        labels = [dataset[1] for dataset in datasets]
        max_sent = max([len(data) for data in sentences])
        max_len = max([min(len(sentence), self.max_len) for sentence in sentences])
        pad_sentence = []
        pad_label = []
        for sentence,label in zip(sentences,labels):
            
            if len(sentence) > max_len:
#                 print('asd')
                pad_sentence.append(sentence[:max_len])
                pad_label.append(label[:max_len])
                attention_masks = [[float(i != 0.0) for i in ii] for ii in pad_sentence]
            else:
#                 print('zxc')
                pad_sentence.append(sentence+[self.word_pad_idx]*(max_len-len(sentence)))
                pad_label.append(label+[self.tag_pad_idx]*(max_len-len(label)))
                attention_masks = [[float(i != 0.0) for i in ii] for ii in pad_sentence]
        return torch.LongTensor(pad_sentence), torch.LongTensor(pad_label),torch.tensor(attention_masks)

## 使用 trainingset+validset 去做 10-fold CV

In [21]:
from torch.utils.data import TensorDataset, DataLoader , SubsetRandomSampler ,ConcatDataset,Sampler
from transformers import get_linear_schedule_with_warmup
bs = 32
k_folds = 10
kfold = KFold(n_splits=k_folds, shuffle=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
epochs = 10
max_grad_norm = 3.0

tr_dataset = BertNerDataset(input_ids,tags,tokenizer.convert_tokens_to_ids('[PAD]'),tag2idx['PAD'])
va_dataset = BertNerDataset(dev_input_ids,dev_tags,tokenizer.convert_tokens_to_ids('[PAD]'),tag2idx['PAD'])
dataset = ConcatDataset([tr_dataset, va_dataset])

In [22]:
All_Fold_score = []
for fold, (train_ids, test_ids) in enumerate(kfold.split(dataset)):
    print(f'FOLD {fold}')
    print('--------------------------------')
    train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids)
    test_subsampler = torch.utils.data.SubsetRandomSampler(test_ids)
    train_dataloader = DataLoader(dataset, batch_size=bs,
                                  collate_fn=tr_dataset.collate_fn,
                                  sampler=train_subsampler)
    valid_dataloader = DataLoader(dataset, batch_size=bs,
                                  collate_fn=va_dataset.collate_fn,
                                  sampler=test_subsampler)

    total_steps = len(train_dataloader) * epochs
    model = BertForTokenClassification.from_pretrained(
        model_ty,
        num_labels=len(tag2idx),
        output_attentions = False,
        output_hidden_states = False
    )
    optimizer = AdamW(model.parameters(),lr=3e-5,eps=1e-8)
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=0,
        num_training_steps=total_steps
    )
    model.cuda()
    all_loader = {
    "train" : train_dataloader,
    "valid" : valid_dataloader,
    }
    Fold_score = []
    for _ in trange(epochs, desc="Epoch"):
        for loader in all_loader:
            total_loss = 0
            # Training loop
            predictions , true_labels = [], []
            for step, batch in enumerate(all_loader[loader]):
                if loader == 'train':
                    model.train()
                else:
                    model.eval()
                batch = tuple(t.to(device) for t in batch)
                b_input_ids, b_labels ,b_input_mask = batch
                model.zero_grad()
                outputs = model(b_input_ids, token_type_ids=None,
                                attention_mask=b_input_mask, labels=b_labels)

                loss = outputs[0]
                loss.backward()
                total_loss += loss.item()

                if loader == 'train':
                    torch.nn.utils.clip_grad_norm_(parameters=model.parameters(), max_norm=max_grad_norm)
                    optimizer.step()
                    scheduler.step()
                logits = outputs[1].detach().cpu().numpy()
                label_ids = b_labels.to('cpu').numpy()
                predictions.extend([list(p) for p in np.argmax(logits, axis=2)])
                true_labels.extend(label_ids)

            avg_loss = total_loss / len(all_loader[loader])
            print(f"Average {loader} loss: {avg_loss}")
            pred_tags,valid_tags = [],[]
            for p, l in zip(predictions, true_labels):
                _p = []
                _l = []
                for p_i, l_i in zip(p, l):
                    if tag_values[l_i] != "PAD":
                        _p.append(tag_values[p_i])
                        _l.append(tag_values[l_i])
                pred_tags.append(_p)
                valid_tags.append(_l)
            print(f"{loader} F1-Score: {f1_score(valid_tags , pred_tags,scheme = IOB2)}")
            if loader == 'valid':
                Fold_score.append(f1_score(valid_tags , pred_tags,scheme = IOB2))
    if Fold_score != []:
        All_Fold_score.append(Fold_score)
#             if _ == (epochs - 1):
#                 print(classification_report(valid_tags, pred_tags,mode='strict',scheme = IOB2))

FOLD 0
--------------------------------


Downloading:   0%|          | 0.00/436M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForTokenClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cas

Average train loss: 0.010300559680393352




train F1-Score: 0.6196611366043063
Average valid loss: 0.006457950304556122
valid F1-Score: 0.7840481565086531


Epoch:  10%|███████▌                                                                   | 1/10 [05:56<53:25, 356.17s/it]

Average train loss: 0.0036142855641541445
train F1-Score: 0.8290674780741005
Average valid loss: 0.006658595228652645
valid F1-Score: 0.8186046511627907


Epoch:  20%|███████████████                                                            | 2/10 [11:51<47:23, 355.39s/it]

Average train loss: 0.002001998961711518
train F1-Score: 0.9021092482422932
Average valid loss: 0.006626694098350256
valid F1-Score: 0.8407643312101911


Epoch:  30%|██████████████████████▌                                                    | 3/10 [17:46<41:27, 355.36s/it]

Average train loss: 0.0012338430284104557
train F1-Score: 0.9305404425099746
Average valid loss: 0.008645789705327677
valid F1-Score: 0.8202247191011236


Epoch:  40%|██████████████████████████████                                             | 4/10 [23:42<35:33, 355.64s/it]

Average train loss: 0.0007405871852626792
train F1-Score: 0.9587272727272728
Average valid loss: 0.0075235866816864
valid F1-Score: 0.8365384615384616


Epoch:  50%|█████████████████████████████████████▌                                     | 5/10 [29:36<29:35, 355.15s/it]

Average train loss: 0.0004726608055252245
train F1-Score: 0.970850792494079
Average valid loss: 0.009161177788280144
valid F1-Score: 0.8469468675654244


Epoch:  60%|█████████████████████████████████████████████                              | 6/10 [35:31<23:40, 355.01s/it]

Average train loss: 0.0002872426495619256
train F1-Score: 0.9829332846582092
Average valid loss: 0.012121113837340268
valid F1-Score: 0.8361581920903955


Epoch:  70%|████████████████████████████████████████████████████▌                      | 7/10 [41:25<17:44, 354.77s/it]

Average train loss: 0.00021456576706982444
train F1-Score: 0.9891214919096809
Average valid loss: 0.011587941954109632
valid F1-Score: 0.8417721518987342


Epoch:  80%|████████████████████████████████████████████████████████████               | 8/10 [47:17<11:47, 353.95s/it]

Average train loss: 7.848835306257731e-05
train F1-Score: 0.9944246412576547
Average valid loss: 0.012812200043680258
valid F1-Score: 0.8492706645056727


Epoch:  90%|███████████████████████████████████████████████████████████████████▌       | 9/10 [53:09<05:53, 353.10s/it]

Average train loss: 5.801860001436856e-05
train F1-Score: 0.9957050169057844
Average valid loss: 0.012675779969884391
valid F1-Score: 0.8525382755842064


Epoch: 100%|██████████████████████████████████████████████████████████████████████████| 10/10 [59:00<00:00, 354.05s/it]


FOLD 1
--------------------------------


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForTokenClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cas

Average train loss: 0.010397590584645556
train F1-Score: 0.6329541414859581
Average valid loss: 0.007378790658137258
valid F1-Score: 0.7598253275109171


Epoch:  10%|███████▌                                                                   | 1/10 [05:50<52:34, 350.55s/it]

Average train loss: 0.0037667803834966506
train F1-Score: 0.8314526240372561
Average valid loss: 0.005380878076902321
valid F1-Score: 0.8161648177496038


Epoch:  20%|███████████████                                                            | 2/10 [11:41<46:48, 351.03s/it]

Average train loss: 0.002021219261033242
train F1-Score: 0.8933727916778764
Average valid loss: 0.006275581285326467
valid F1-Score: 0.8434712084347121


Epoch:  30%|██████████████████████▌                                                    | 3/10 [17:33<40:57, 351.08s/it]

Average train loss: 0.0013019763189155678
train F1-Score: 0.9328156041177533
Average valid loss: 0.008448480299079542
valid F1-Score: 0.859071729957806


Epoch:  40%|██████████████████████████████                                             | 4/10 [23:24<35:06, 351.15s/it]

Average train loss: 0.0008083470246843099
train F1-Score: 0.9551101840935885
Average valid loss: 0.007771854958751386
valid F1-Score: 0.860738255033557


Epoch:  50%|█████████████████████████████████████▌                                     | 5/10 [29:15<29:15, 351.04s/it]

Average train loss: 0.0005948000922331964
train F1-Score: 0.9673606691517411
Average valid loss: 0.007268929254173567
valid F1-Score: 0.858044164037855


Epoch:  60%|█████████████████████████████████████████████                              | 6/10 [35:06<23:25, 351.26s/it]

Average train loss: 0.0002949631860914208
train F1-Score: 0.9824081669856896
Average valid loss: 0.008036635509115108
valid F1-Score: 0.8677419354838709


Epoch:  70%|████████████████████████████████████████████████████▌                      | 7/10 [40:57<17:33, 351.17s/it]

Average train loss: 0.00020702522031654954
train F1-Score: 0.988872674206494
Average valid loss: 0.00879899091595925
valid F1-Score: 0.8717532467532467


Epoch:  80%|████████████████████████████████████████████████████████████               | 8/10 [46:49<11:42, 351.23s/it]

Average train loss: 0.00012145452317706941
train F1-Score: 0.993335159317082
Average valid loss: 0.008899311330500112
valid F1-Score: 0.874074074074074


Epoch:  90%|███████████████████████████████████████████████████████████████████▌       | 9/10 [52:40<05:51, 351.12s/it]

Average train loss: 6.054653704617444e-05
train F1-Score: 0.9965284122053718
Average valid loss: 0.009260144201153384
valid F1-Score: 0.8760330578512397


Epoch: 100%|██████████████████████████████████████████████████████████████████████████| 10/10 [58:31<00:00, 351.17s/it]


FOLD 2
--------------------------------


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForTokenClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cas

Average train loss: 0.010364284250250589
train F1-Score: 0.6176109454481669
Average valid loss: 0.005584403088626904
valid F1-Score: 0.7915611814345992


Epoch:  10%|███████▌                                                                   | 1/10 [05:50<52:37, 350.84s/it]

Average train loss: 0.003929644336650913
train F1-Score: 0.8262835860601224
Average valid loss: 0.005202563185825998
valid F1-Score: 0.8338926174496643


Epoch:  20%|███████████████                                                            | 2/10 [11:41<46:47, 350.91s/it]

Average train loss: 0.0021240697954047717
train F1-Score: 0.8905734415410684
Average valid loss: 0.006479525819600251
valid F1-Score: 0.8216503992901507


Epoch:  30%|██████████████████████▌                                                    | 3/10 [17:33<40:57, 351.06s/it]

Average train loss: 0.0013162463003759085
train F1-Score: 0.9321698452680821
Average valid loss: 0.007594076147445369
valid F1-Score: 0.8551959114139693


Epoch:  40%|██████████████████████████████                                             | 4/10 [23:24<35:06, 351.12s/it]

Average train loss: 0.0007841425381393714
train F1-Score: 0.9569931333574269
Average valid loss: 0.007144531250840339
valid F1-Score: 0.8482293423271501


Epoch:  50%|█████████████████████████████████████▌                                     | 5/10 [29:16<29:16, 351.40s/it]

Average train loss: 0.0005026943687026957
train F1-Score: 0.9667570009033424
Average valid loss: 0.00860774533956971
valid F1-Score: 0.8559027777777778


Epoch:  60%|█████████████████████████████████████████████                              | 6/10 [35:06<23:24, 351.11s/it]

Average train loss: 0.0002907239883643496
train F1-Score: 0.9818577648766328
Average valid loss: 0.008801637264202963
valid F1-Score: 0.8629441624365483


Epoch:  70%|████████████████████████████████████████████████████▌                      | 7/10 [40:57<17:33, 351.10s/it]

Average train loss: 0.00021214201633998038
train F1-Score: 0.9883868626383596
Average valid loss: 0.008654695179231685
valid F1-Score: 0.8502894954507857


Epoch:  80%|████████████████████████████████████████████████████████████               | 8/10 [46:49<11:42, 351.36s/it]

Average train loss: 0.00013337289028199462
train F1-Score: 0.9919215757465735
Average valid loss: 0.01027846563232089
valid F1-Score: 0.846938775510204


Epoch:  90%|███████████████████████████████████████████████████████████████████▌       | 9/10 [52:40<05:51, 351.31s/it]

Average train loss: 6.70596025851957e-05
train F1-Score: 0.9944580721359134
Average valid loss: 0.011055991952465523
valid F1-Score: 0.8564058469475494


Epoch: 100%|██████████████████████████████████████████████████████████████████████████| 10/10 [58:32<00:00, 351.24s/it]


FOLD 3
--------------------------------


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForTokenClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cas

Average train loss: 0.010462310315725866
train F1-Score: 0.6140969162995595
Average valid loss: 0.005620522901861038
valid F1-Score: 0.7370929308975377


Epoch:  10%|███████▌                                                                   | 1/10 [05:51<52:40, 351.12s/it]

Average train loss: 0.0037480456058713732
train F1-Score: 0.8271259807015962
Average valid loss: 0.00906884992205913
valid F1-Score: 0.7741027445460944


Epoch:  20%|███████████████                                                            | 2/10 [11:42<46:48, 351.08s/it]

Average train loss: 0.002055523306747189
train F1-Score: 0.8982513364138806
Average valid loss: 0.004912522275555679
valid F1-Score: 0.8453922315308454


Epoch:  30%|██████████████████████▌                                                    | 3/10 [17:33<40:59, 351.34s/it]

Average train loss: 0.0012799652176236852
train F1-Score: 0.9325086410769511
Average valid loss: 0.007102664044143146
valid F1-Score: 0.8526785714285715


Epoch:  40%|██████████████████████████████                                             | 4/10 [23:25<35:08, 351.35s/it]

Average train loss: 0.0009245742661936412
train F1-Score: 0.9484235465646073
Average valid loss: 0.006604312358548236
valid F1-Score: 0.8622291021671826


Epoch:  50%|█████████████████████████████████████▌                                     | 5/10 [29:16<29:15, 351.17s/it]

Average train loss: 0.0005475310849799777
train F1-Score: 0.9654542131237434
Average valid loss: 0.007297137575721644
valid F1-Score: 0.8760330578512396


Epoch:  60%|█████████████████████████████████████████████                              | 6/10 [35:07<23:24, 351.23s/it]

Average train loss: 0.000389921214560201
train F1-Score: 0.9770377824535723
Average valid loss: 0.006848287674217235
valid F1-Score: 0.8685015290519877


Epoch:  70%|████████████████████████████████████████████████████▌                      | 7/10 [40:58<17:34, 351.35s/it]

Average train loss: 0.0002223329275170644
train F1-Score: 0.9855152181884855
Average valid loss: 0.00713570677732655
valid F1-Score: 0.8687643898695319


Epoch:  80%|████████████████████████████████████████████████████████████               | 8/10 [46:50<11:42, 351.27s/it]

Average train loss: 0.0001493504122863494
train F1-Score: 0.9918296153493068
Average valid loss: 0.008234415247438202
valid F1-Score: 0.8741418764302059


Epoch:  90%|███████████████████████████████████████████████████████████████████▌       | 9/10 [52:41<05:51, 351.27s/it]

Average train loss: 6.458036922568306e-05
train F1-Score: 0.9953172344137361
Average valid loss: 0.008499362747063562
valid F1-Score: 0.8825786646201074


Epoch: 100%|██████████████████████████████████████████████████████████████████████████| 10/10 [58:32<00:00, 351.23s/it]


FOLD 4
--------------------------------


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForTokenClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cas

Average train loss: 0.010619112649334234
train F1-Score: 0.5946446615021892
Average valid loss: 0.004563292189038189
valid F1-Score: 0.7955326460481099


Epoch:  10%|███████▌                                                                   | 1/10 [05:50<52:36, 350.75s/it]

Average train loss: 0.0036960342544249403
train F1-Score: 0.8281639928698752
Average valid loss: 0.004621115081400056
valid F1-Score: 0.8481421647819063


Epoch:  20%|███████████████                                                            | 2/10 [11:41<46:47, 350.99s/it]

Average train loss: 0.0020339947663116954
train F1-Score: 0.8984857987635516
Average valid loss: 0.00496720556330288
valid F1-Score: 0.8402270884022709


Epoch:  30%|██████████████████████▌                                                    | 3/10 [17:32<40:57, 351.03s/it]

Average train loss: 0.0013276759863208182
train F1-Score: 0.9320703653585927
Average valid loss: 0.005253398560863501
valid F1-Score: 0.8445199660152931


Epoch:  40%|██████████████████████████████                                             | 4/10 [23:23<35:06, 351.01s/it]

Average train loss: 0.0007769265503547525
train F1-Score: 0.9543929640039895
Average valid loss: 0.006071394766035479
valid F1-Score: 0.8512396694214875


Epoch:  50%|█████████████████████████████████████▌                                     | 5/10 [29:15<29:16, 351.23s/it]

Average train loss: 0.0004680271237892598
train F1-Score: 0.970737913486005
Average valid loss: 0.005937409699352395
valid F1-Score: 0.8594249201277955


Epoch:  60%|█████████████████████████████████████████████                              | 6/10 [35:07<23:26, 351.51s/it]

Average train loss: 0.0003239726494092296
train F1-Score: 0.9790833030192797
Average valid loss: 0.007806699837719307
valid F1-Score: 0.8532654792196777


Epoch:  70%|████████████████████████████████████████████████████▌                      | 7/10 [40:59<17:34, 351.57s/it]

Average train loss: 0.00021146735855837544
train F1-Score: 0.9877893202114089
Average valid loss: 0.008864080263384628
valid F1-Score: 0.871186440677966


Epoch:  80%|████████████████████████████████████████████████████████████               | 8/10 [46:51<11:43, 351.90s/it]

Average train loss: 0.00012551743981712937
train F1-Score: 0.9905161407988328
Average valid loss: 0.007825994530810716
valid F1-Score: 0.8673894912427023


Epoch:  90%|███████████████████████████████████████████████████████████████████▌       | 9/10 [52:43<05:51, 351.82s/it]

Average train loss: 5.3947440391570335e-05
train F1-Score: 0.9967153284671534
Average valid loss: 0.009067950678184812
valid F1-Score: 0.8686192468619247


Epoch: 100%|██████████████████████████████████████████████████████████████████████████| 10/10 [58:35<00:00, 351.51s/it]


FOLD 5
--------------------------------


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForTokenClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cas

Average train loss: 0.010395915107545705
train F1-Score: 0.6322291853178155
Average valid loss: 0.005941001323224159
valid F1-Score: 0.8125502815768303


Epoch:  10%|███████▌                                                                   | 1/10 [05:50<52:38, 350.93s/it]

Average train loss: 0.003667508953174098
train F1-Score: 0.8299660289647774
Average valid loss: 0.006073782311379047
valid F1-Score: 0.8381877022653722


Epoch:  20%|███████████████                                                            | 2/10 [11:43<46:53, 351.72s/it]

Average train loss: 0.0021246480210243145
train F1-Score: 0.893781452192667
Average valid loss: 0.006322710444968178
valid F1-Score: 0.8361669242658424


Epoch:  30%|██████████████████████▌                                                    | 3/10 [17:34<41:00, 351.49s/it]

Average train loss: 0.0013106793228557642
train F1-Score: 0.9301989150090415
Average valid loss: 0.006327952287803242
valid F1-Score: 0.8585055643879171


Epoch:  40%|██████████████████████████████                                             | 4/10 [23:26<35:10, 351.71s/it]

Average train loss: 0.0008418054574466358
train F1-Score: 0.9538433581682718
Average valid loss: 0.007627752842892788
valid F1-Score: 0.8421900161030595


Epoch:  50%|█████████████████████████████████████▌                                     | 5/10 [29:18<29:18, 351.66s/it]

Average train loss: 0.0005404249796405928
train F1-Score: 0.968952016753164
Average valid loss: 0.008543290333444125
valid F1-Score: 0.8487886382623224


Epoch:  60%|█████████████████████████████████████████████                              | 6/10 [35:10<23:27, 351.83s/it]

Average train loss: 0.0003839759596555331
train F1-Score: 0.977830489918803
Average valid loss: 0.007930987227566478
valid F1-Score: 0.8426073131955484


Epoch:  70%|████████████████████████████████████████████████████▌                      | 7/10 [41:01<17:34, 351.65s/it]

Average train loss: 0.00017883750913700252
train F1-Score: 0.9880441726750023
Average valid loss: 0.007962270516876353
valid F1-Score: 0.8535031847133758


Epoch:  80%|████████████████████████████████████████████████████████████               | 8/10 [46:53<11:43, 351.71s/it]

Average train loss: 0.000104702116484436
train F1-Score: 0.9933315063487714
Average valid loss: 0.0106459215451173
valid F1-Score: 0.8621794871794873


Epoch:  90%|███████████████████████████████████████████████████████████████████▌       | 9/10 [52:44<05:51, 351.61s/it]

Average train loss: 6.455916138561432e-05
train F1-Score: 0.9963443611771157
Average valid loss: 0.011066682064948942
valid F1-Score: 0.8647773279352227


Epoch: 100%|██████████████████████████████████████████████████████████████████████████| 10/10 [58:35<00:00, 351.56s/it]


FOLD 6
--------------------------------


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForTokenClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cas

Average train loss: 0.009916022862760155
train F1-Score: 0.632951823263242
Average valid loss: 0.005738414811655149
valid F1-Score: 0.7769667477696673


Epoch:  10%|███████▌                                                                   | 1/10 [05:50<52:38, 350.96s/it]

Average train loss: 0.003717360259605315
train F1-Score: 0.8212758374910906
Average valid loss: 0.005452642307225803
valid F1-Score: 0.8029315960912052


Epoch:  20%|███████████████                                                            | 2/10 [11:42<46:50, 351.33s/it]

Average train loss: 0.0021156910863144728
train F1-Score: 0.895551776604314
Average valid loss: 0.005406796645912805
valid F1-Score: 0.8255250403877221


Epoch:  30%|██████████████████████▌                                                    | 3/10 [17:34<41:00, 351.45s/it]

Average train loss: 0.0012913126850735623
train F1-Score: 0.9289321789321789
Average valid loss: 0.006597456197971736
valid F1-Score: 0.8432168968318441


Epoch:  40%|██████████████████████████████                                             | 4/10 [23:25<35:09, 351.51s/it]

Average train loss: 0.0008554321236798477
train F1-Score: 0.9527331769799747
Average valid loss: 0.0072709152881782115
valid F1-Score: 0.8602150537634409


Epoch:  50%|█████████████████████████████████████▌                                     | 5/10 [29:17<29:18, 351.76s/it]

Average train loss: 0.00046254875116072495
train F1-Score: 0.967765302426657
Average valid loss: 0.00860596512928912
valid F1-Score: 0.8205128205128206


Epoch:  60%|█████████████████████████████████████████████                              | 6/10 [35:09<23:26, 351.55s/it]

Average train loss: 0.0002968586418750157
train F1-Score: 0.9803850345078096
Average valid loss: 0.008717319458121603
valid F1-Score: 0.8380952380952381


Epoch:  70%|████████████████████████████████████████████████████▌                      | 7/10 [41:00<17:34, 351.55s/it]

Average train loss: 0.00020588246405930336
train F1-Score: 0.985183165166803
Average valid loss: 0.009701195071648855
valid F1-Score: 0.8576186511240632


Epoch:  80%|████████████████████████████████████████████████████████████               | 8/10 [46:52<11:43, 351.51s/it]

Average train loss: 0.00010527408333197412
train F1-Score: 0.9925359548516293
Average valid loss: 0.011616326019433056
valid F1-Score: 0.8592592592592594


Epoch:  90%|███████████████████████████████████████████████████████████████████▌       | 9/10 [52:44<05:51, 351.69s/it]

Average train loss: 6.463680647399269e-05
train F1-Score: 0.9954504094631483
Average valid loss: 0.01161068773418761
valid F1-Score: 0.8576131687242798


Epoch: 100%|██████████████████████████████████████████████████████████████████████████| 10/10 [58:35<00:00, 351.54s/it]


FOLD 7
--------------------------------


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForTokenClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cas

Average train loss: 0.01094859034951836
train F1-Score: 0.6111013832953948
Average valid loss: 0.005073002643444933
valid F1-Score: 0.8253452477660438


Epoch:  10%|███████▌                                                                   | 1/10 [05:51<52:41, 351.28s/it]

Average train loss: 0.003614189915857946
train F1-Score: 0.8318536931818181
Average valid loss: 0.004698307252133854
valid F1-Score: 0.8376623376623376


Epoch:  20%|███████████████                                                            | 2/10 [11:42<46:50, 351.37s/it]

Average train loss: 0.0019407510158849165
train F1-Score: 0.8997226943375972
Average valid loss: 0.005683108977955569
valid F1-Score: 0.8482220294882914


Epoch:  30%|██████████████████████▌                                                    | 3/10 [17:34<40:59, 351.36s/it]

Average train loss: 0.0013121395975280367
train F1-Score: 0.9281221922731356
Average valid loss: 0.005795829508378731
valid F1-Score: 0.8731914893617021


Epoch:  40%|██████████████████████████████                                             | 4/10 [23:25<35:07, 351.33s/it]

Average train loss: 0.0007489929644483537
train F1-Score: 0.9561537768974522
Average valid loss: 0.00739739618034814
valid F1-Score: 0.8640533778148458


Epoch:  50%|█████████████████████████████████████▌                                     | 5/10 [29:16<29:17, 351.41s/it]

Average train loss: 0.00048310654181766297
train F1-Score: 0.9681620839363242
Average valid loss: 0.008850167695277219
valid F1-Score: 0.8521739130434781


Epoch:  60%|█████████████████████████████████████████████                              | 6/10 [35:07<23:25, 351.29s/it]

Average train loss: 0.0003375704894201808
train F1-Score: 0.9797174936617168
Average valid loss: 0.00878605346895815
valid F1-Score: 0.8507718696397941


Epoch:  70%|████████████████████████████████████████████████████▌                      | 7/10 [40:59<17:33, 351.33s/it]

Average train loss: 0.00014753248729570655
train F1-Score: 0.9907407407407406
Average valid loss: 0.009280496075046654
valid F1-Score: 0.8711036225779274


Epoch:  80%|████████████████████████████████████████████████████████████               | 8/10 [46:50<11:42, 351.24s/it]

Average train loss: 8.982187641564812e-05
train F1-Score: 0.9925643815741748
Average valid loss: 0.010532027171329134
valid F1-Score: 0.8729472774416595


Epoch:  90%|███████████████████████████████████████████████████████████████████▌       | 9/10 [52:41<05:51, 351.20s/it]

Average train loss: 6.713345040867719e-05
train F1-Score: 0.9960021806287479
Average valid loss: 0.009682968874734604
valid F1-Score: 0.8718830610490111


Epoch: 100%|██████████████████████████████████████████████████████████████████████████| 10/10 [58:32<00:00, 351.30s/it]


FOLD 8
--------------------------------


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForTokenClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cas

Average train loss: 0.010254553572554113
train F1-Score: 0.635567987007128
Average valid loss: 0.006625874772852612
valid F1-Score: 0.772795216741405


Epoch:  10%|███████▌                                                                   | 1/10 [05:51<52:47, 351.98s/it]

Average train loss: 0.0038184477439921834
train F1-Score: 0.8329150233058444
Average valid loss: 0.006247858544589153
valid F1-Score: 0.7944142746314973


Epoch:  20%|███████████████                                                            | 2/10 [11:43<46:51, 351.42s/it]

Average train loss: 0.0020342088391227075
train F1-Score: 0.9009237456982431
Average valid loss: 0.006416745282685249
valid F1-Score: 0.8143621084797554


Epoch:  30%|██████████████████████▌                                                    | 3/10 [17:34<40:59, 351.35s/it]

Average train loss: 0.0011900924995240294
train F1-Score: 0.9341687097067102
Average valid loss: 0.008187172824517456
valid F1-Score: 0.8352490421455939


Epoch:  40%|██████████████████████████████                                             | 4/10 [23:26<35:10, 351.81s/it]

Average train loss: 0.0007972500707827399
train F1-Score: 0.9535964992250888
Average valid loss: 0.0071612350862055715
valid F1-Score: 0.8301599390708302


Epoch:  50%|█████████████████████████████████████▌                                     | 5/10 [29:18<29:18, 351.67s/it]

Average train loss: 0.0004875239610817754
train F1-Score: 0.9685075308078502
Average valid loss: 0.008352269258274586
valid F1-Score: 0.8448275862068966


Epoch:  60%|█████████████████████████████████████████████                              | 6/10 [35:09<23:26, 351.71s/it]

Average train loss: 0.0003219292405914935
train F1-Score: 0.9804065189525728
Average valid loss: 0.007770774315853417
valid F1-Score: 0.8294515401953418


Epoch:  70%|████████████████████████████████████████████████████▌                      | 7/10 [41:02<17:35, 351.81s/it]

Average train loss: 0.00019083096680407705
train F1-Score: 0.9868891537544696
Average valid loss: 0.009139169438742876
valid F1-Score: 0.8522283033620015


Epoch:  80%|████████████████████████████████████████████████████████████               | 8/10 [46:53<11:43, 351.78s/it]

Average train loss: 0.00010250043505302605
train F1-Score: 0.9927502982472239
Average valid loss: 0.010859682424085247
valid F1-Score: 0.8608562691131498


Epoch:  90%|███████████████████████████████████████████████████████████████████▌       | 9/10 [52:45<05:51, 351.73s/it]

Average train loss: 8.110883241545672e-05
train F1-Score: 0.9937637564196625
Average valid loss: 0.009885227958184434
valid F1-Score: 0.8562691131498471


Epoch: 100%|██████████████████████████████████████████████████████████████████████████| 10/10 [58:36<00:00, 351.68s/it]


FOLD 9
--------------------------------


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForTokenClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cas

Average train loss: 0.010585855822860332
train F1-Score: 0.624122191011236
Average valid loss: 0.009245978209452619
valid F1-Score: 0.6301824212271974


Epoch:  10%|███████▌                                                                   | 1/10 [05:51<52:46, 351.86s/it]

Average train loss: 0.0038767488104833143
train F1-Score: 0.8328767123287671
Average valid loss: 0.006184263521325712
valid F1-Score: 0.817792985457656


Epoch:  20%|███████████████                                                            | 2/10 [11:43<46:52, 351.62s/it]

Average train loss: 0.0020666174549630054
train F1-Score: 0.8927587738782763
Average valid loss: 0.0058549825464870595
valid F1-Score: 0.8156521739130436


Epoch:  30%|██████████████████████▌                                                    | 3/10 [17:34<41:01, 351.61s/it]

Average train loss: 0.0013788674585255428
train F1-Score: 0.92727599209912
Average valid loss: 0.007350436896173686
valid F1-Score: 0.8181818181818182


Epoch:  40%|██████████████████████████████                                             | 4/10 [23:27<35:10, 351.81s/it]

Average train loss: 0.0008291945039487536
train F1-Score: 0.9522439068261535
Average valid loss: 0.0075223763666332245
valid F1-Score: 0.8438893844781445


Epoch:  50%|█████████████████████████████████████▌                                     | 5/10 [29:27<29:33, 354.77s/it]

Average train loss: 0.0005494420610996219
train F1-Score: 0.9689530685920578
Average valid loss: 0.00956507042250247
valid F1-Score: 0.8280254777070064


Epoch:  60%|█████████████████████████████████████████████                              | 6/10 [35:37<23:59, 359.95s/it]

Average train loss: 0.00033341251238605004
train F1-Score: 0.97590470174172
Average valid loss: 0.010470236516970762
valid F1-Score: 0.813126709206928


Epoch:  70%|████████████████████████████████████████████████████▌                      | 7/10 [41:47<18:09, 363.23s/it]

Average train loss: 0.00020080217085274998
train F1-Score: 0.9854338188727042
Average valid loss: 0.01153930407855195
valid F1-Score: 0.8354430379746836


Epoch:  80%|████████████████████████████████████████████████████████████               | 8/10 [47:53<12:08, 364.38s/it]

Average train loss: 0.00011826303393007498
train F1-Score: 0.9923899257111796
Average valid loss: 0.010541966982189488
valid F1-Score: 0.8336314847942754


Epoch:  90%|███████████████████████████████████████████████████████████████████▌       | 9/10 [54:00<06:04, 364.96s/it]

Average train loss: 5.468315700827773e-05
train F1-Score: 0.997009515178976
Average valid loss: 0.01181853263300679
valid F1-Score: 0.8325952170062001


Epoch: 100%|████████████████████████████████████████████████████████████████████████| 10/10 [1:00:05<00:00, 360.58s/it]


In [23]:
All_score = len(All_Fold_score[0]) * [0]
for score in All_Fold_score:
    All_score = np.sum([All_score,score], axis = 0)
All_score = np.round((All_score / 10),2)     

In [24]:
All_score

array([0.77, 0.82, 0.83, 0.85, 0.85, 0.85, 0.85, 0.86, 0.86, 0.86])

## 單用 trainingset 去做 10-fold CV

In [25]:
kfold = KFold(n_splits=k_folds, shuffle=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
bs = 64
epochs = 10
max_grad_norm = 3.0

tr_dataset = BertNerDataset(input_ids,tags,tokenizer.convert_tokens_to_ids('[PAD]'),tag2idx['PAD'])

In [None]:
All_Fold_score = []
for fold, (train_ids, test_ids) in enumerate(kfold.split(tr_dataset)):
    print(f'FOLD {fold}')
    print('--------------------------------')
    train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids)
    test_subsampler = torch.utils.data.SubsetRandomSampler(test_ids)
    train_dataloader = DataLoader(dataset, batch_size=bs,
                                  collate_fn=tr_dataset.collate_fn,
                                  sampler=train_subsampler)
    valid_dataloader = DataLoader(dataset, batch_size=bs,
                                  collate_fn=va_dataset.collate_fn,
                                  sampler=test_subsampler)

    total_steps = len(train_dataloader) * epochs
    model = BertForTokenClassification.from_pretrained(
        model_ty,
        num_labels=len(tag2idx),
        output_attentions = False,
        output_hidden_states = False
    )
    optimizer = AdamW(model.parameters(),lr=3e-5,eps=1e-8)
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=0,
        num_training_steps=total_steps
    )
    model.cuda()
    all_loader = {
    "train" : train_dataloader,
    "valid" : valid_dataloader,
    }
    Fold_score = []
    for _ in trange(epochs, desc="Epoch"):
        for loader in all_loader:
            total_loss = 0
            # Training loop
            predictions , true_labels = [], []
            for step, batch in enumerate(all_loader[loader]):
                if loader == 'train':
                    model.train()
                else:
                    model.eval()
                batch = tuple(t.to(device) for t in batch)
                b_input_ids, b_labels ,b_input_mask = batch
                model.zero_grad()
                outputs = model(b_input_ids, token_type_ids=None,
                                attention_mask=b_input_mask, labels=b_labels)

                loss = outputs[0]
                loss.backward()
                total_loss += loss.item()

                if loader == 'train':
                    torch.nn.utils.clip_grad_norm_(parameters=model.parameters(), max_norm=max_grad_norm)
                    optimizer.step()
                    scheduler.step()
                logits = outputs[1].detach().cpu().numpy()
                label_ids = b_labels.to('cpu').numpy()
                predictions.extend([list(p) for p in np.argmax(logits, axis=2)])
                true_labels.extend(label_ids)

            avg_loss = total_loss / len(all_loader[loader])
            print(f"Average {loader} loss: {avg_loss}")
            pred_tags,valid_tags = [],[]
            for p, l in zip(predictions, true_labels):
                _p = []
                _l = []
                for p_i, l_i in zip(p, l):
                    if tag_values[l_i] != "PAD":
                        _p.append(tag_values[p_i])
                        _l.append(tag_values[l_i])
                pred_tags.append(_p)
                valid_tags.append(_l)
            print(f"{loader} F1-Score: {f1_score(valid_tags , pred_tags,scheme = IOB2)}")
            if loader == 'valid':
                Fold_score.append(f1_score(valid_tags , pred_tags,scheme = IOB2))
    if Fold_score != []:
        All_Fold_score.append(Fold_score)
#             if _ == (epochs - 1):
#                 print(classification_report(valid_tags, pred_tags,mode='strict',scheme = IOB2))

FOLD 0
--------------------------------


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForTokenClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cas

Average train loss: 0.015133075965451798
train F1-Score: 0.6078832636355549
Average valid loss: 0.006847840949128361
valid F1-Score: 0.7986171132238549


Epoch:  10%|███████▌                                                                   | 1/10 [04:14<38:13, 254.80s/it]

Average train loss: 0.004556785061317847
train F1-Score: 0.8348516280328312
Average valid loss: 0.0068908819337759105
valid F1-Score: 0.8116938950988822


Epoch:  20%|███████████████                                                            | 2/10 [09:09<37:07, 278.48s/it]

Average train loss: 0.002645198684861199
train F1-Score: 0.8933200398803589
Average valid loss: 0.008843207057789305
valid F1-Score: 0.8109028960817717


Epoch:  30%|██████████████████████▌                                                    | 3/10 [14:05<33:24, 286.39s/it]

Average train loss: 0.0015399439633459976
train F1-Score: 0.9380433787864922
Average valid loss: 0.007930591535285487
valid F1-Score: 0.8378839590443686


Epoch:  40%|██████████████████████████████                                             | 4/10 [19:00<28:59, 289.85s/it]

Average train loss: 0.0008924153075964398
train F1-Score: 0.9584787800845123
Average valid loss: 0.010086697926703301
valid F1-Score: 0.8514680483592401


Epoch:  50%|█████████████████████████████████████▌                                     | 5/10 [23:41<23:53, 286.64s/it]

Average train loss: 0.0005404584471824448
train F1-Score: 0.9739865796488648
Average valid loss: 0.010505727719943295
valid F1-Score: 0.8452173913043478


Epoch:  60%|█████████████████████████████████████████████                              | 6/10 [27:59<18:27, 276.83s/it]

Average train loss: 0.00038990858112814393
train F1-Score: 0.9811981566820277
Average valid loss: 0.011425018871397265
valid F1-Score: 0.8467670504871568


Epoch:  70%|████████████████████████████████████████████████████▌                      | 7/10 [32:47<14:01, 280.48s/it]

Average train loss: 0.00017431509045933054
train F1-Score: 0.9886479003230272
Average valid loss: 0.01313114168273573
valid F1-Score: 0.8476357267950962


Epoch:  80%|████████████████████████████████████████████████████████████               | 8/10 [37:38<09:27, 283.92s/it]

Average train loss: 0.0001395025618654761
train F1-Score: 0.9927074679220899
Average valid loss: 0.013536095514989957
valid F1-Score: 0.8581375108790252


Epoch:  90%|███████████████████████████████████████████████████████████████████▌       | 9/10 [42:25<04:44, 284.66s/it]

Average train loss: 5.960426894476368e-05
train F1-Score: 0.9957509698873084
Average valid loss: 0.014084202973360153
valid F1-Score: 0.8524590163934426


Epoch: 100%|██████████████████████████████████████████████████████████████████████████| 10/10 [47:28<00:00, 284.87s/it]


FOLD 1
--------------------------------


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForTokenClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cas

Average train loss: 0.015206943297377722
train F1-Score: 0.6008303153431676
Average valid loss: 0.006871790106931978
valid F1-Score: 0.8110367892976589


Epoch:  10%|███████▌                                                                   | 1/10 [05:05<45:47, 305.24s/it]

Average train loss: 0.0045988371938012395
train F1-Score: 0.829497247043957
Average valid loss: 0.005412186552064951
valid F1-Score: 0.8387096774193549


Epoch:  20%|███████████████                                                            | 2/10 [09:00<35:12, 264.06s/it]

Average train loss: 0.002301824189832229
train F1-Score: 0.9029320706610817
Average valid loss: 0.006375025175763661
valid F1-Score: 0.8595317725752508


Epoch:  30%|██████████████████████▌                                                    | 3/10 [12:52<29:05, 249.36s/it]

In [None]:
All_score = len(All_Fold_score[0]) * [0]
for score in All_Fold_score:
    All_score = np.sum([All_score,score], axis = 0)
All_score = np.round((All_score / 10) , 2)
All_score

## 全部的 trainingset 訓練 model 後, 透過 validset 評估 model

In [None]:
model = BertForTokenClassification.from_pretrained(
    model_ty,
    num_labels=len(tag2idx),
    output_attentions = False,
    output_hidden_states = False
)
optimizer = AdamW(model.parameters(),lr=3e-5,eps=1e-8)
scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=0,
    num_training_steps=total_steps
    )
model.cuda()
train_dataloader = DataLoader(tr_dataset, batch_size=bs,
                            collate_fn=tr_dataset.collate_fn)
valid_dataloader = DataLoader(va_dataset, batch_size=bs,
                            collate_fn=va_dataset.collate_fn)
all_loader = {
    "train" : train_dataloader,
    "valid" : valid_dataloader,
}

In [None]:
F_Score = []
for _ in trange(epochs, desc="Epoch"):
    for loader in all_loader:
        total_loss = 0
        # Training loop
        predictions , true_labels = [], []
        for step, batch in enumerate(all_loader[loader]):
            if loader == 'train':
                model.train()
            else:
                model.eval()
            batch = tuple(t.to(device) for t in batch)
            b_input_ids, b_labels ,b_input_mask = batch
            model.zero_grad()
            outputs = model(b_input_ids, token_type_ids=None,
                            attention_mask=b_input_mask, labels=b_labels)

            loss = outputs[0]
            loss.backward()
            total_loss += loss.item()

            if loader == 'train':
                torch.nn.utils.clip_grad_norm_(parameters=model.parameters(), max_norm=max_grad_norm)
                optimizer.step()
                scheduler.step()
            logits = outputs[1].detach().cpu().numpy()
            label_ids = b_labels.to('cpu').numpy()
            predictions.extend([list(p) for p in np.argmax(logits, axis=2)])
            true_labels.extend(label_ids)

        avg_loss = total_loss / len(all_loader[loader])
        print(f"Average {loader} loss: {avg_loss}")
        pred_tags,valid_tags = [],[]
        for p, l in zip(predictions, true_labels):
            _p = []
            _l = []
            for p_i, l_i in zip(p, l):
                if tag_values[l_i] != "PAD":
                    _p.append(tag_values[p_i])
                    _l.append(tag_values[l_i])
            pred_tags.append(_p)
            valid_tags.append(_l)
        print(f"{loader} F1-Score: {f1_score(valid_tags , pred_tags,scheme = IOB2)}")
        if loader == 'valid':
            F_Score.append(f1_score(valid_tags , pred_tags,scheme = IOB2))

In [None]:
np.round(np.array(F_Score),2)