In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm, trange
import torch
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from transformers import BertTokenizer, BertConfig

from keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split

torch.__version__

import transformers
from transformers import BertForTokenClassification, AdamW

from transformers import get_linear_schedule_with_warmup
transformers.__version__


from seqeval.metrics import f1_score, accuracy_score

In [2]:
class SentenceGetter(object):

    def __init__(self, data):
        self.n_sent = 1
        self.data = data
        self.empty = False
        agg_func = lambda s: [(w, p, t) for w, p, t in zip(s["word"].values.tolist(),
                                                           s["pos"].values.tolist(),
                                                           s["NER"].values.tolist())]
        self.grouped = self.data.groupby("sentence").apply(agg_func)
        self.sentences = [s for s in self.grouped]

    def get_next(self):
        try:
            s = self.grouped["Sentence: {}".format(self.n_sent)]
            self.n_sent += 1
            return s
        except:
            return None

In [3]:
path_ = r"C:\Users\LAB\jupyter\DeepLearning_project\data\NER\{}.xlsx"

buds_live = pd.read_excel(path_.format("buds_live"), encoding="UTF-8").fillna(method="ffill")
buds_live.word=buds_live.word.astype(str)

buds_live.head()

Unnamed: 0.1,Unnamed: 0,word,rule_label,IOB,sentence,enter,review,pos,NER
0,0,2020,,O,0,0,0,Number,O
1,1,.,,O,0,0,0,Punctuation,O
2,2,08,,O,0,0,0,Number,O
3,3,.,,O,0,0,0,Punctuation,O
4,4,07,,O,0,0,0,Number,O


In [4]:
getter = SentenceGetter(buds_live)
getter.sentences[0]

[('2020', 'Number', 'O'),
 ('.', 'Punctuation', 'O'),
 ('08', 'Number', 'O'),
 ('.', 'Punctuation', 'O'),
 ('07', 'Number', 'O'),
 ('로켓', 'Noun', 'O'),
 ('와우', 'Noun', 'O'),
 ('아침', 'Noun', 'O'),
 ('6시', 'Number', 'O'),
 ('에', 'Foreign', 'O'),
 ('수령', 'Noun', 'O'),
 ('받다', 'Verb', 'O'),
 ('뒤', 'Noun', 'O'),
 ('페어', 'Noun', 'O'),
 ('링후', 'Noun', 'O'),
 ('약', 'Noun', 'O'),
 ('15분', 'Number', 'O'),
 ('간', 'Foreign', 'O'),
 ('사용', 'Noun', 'O'),
 ('후', 'Noun', 'O'),
 ('후기', 'Noun', 'O'),
 ('올리다', 'Verb', 'O'),
 ('.', 'Punctuation', 'O')]

In [5]:
sentences = [[word[0] for word in sentence] for sentence in getter.sentences]
sentences[0]

['2020',
 '.',
 '08',
 '.',
 '07',
 '로켓',
 '와우',
 '아침',
 '6시',
 '에',
 '수령',
 '받다',
 '뒤',
 '페어',
 '링후',
 '약',
 '15분',
 '간',
 '사용',
 '후',
 '후기',
 '올리다',
 '.']

In [6]:
labels = [[s[2] for s in sentence] for sentence in getter.sentences]
print(labels[0])

['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']


In [9]:
tag_values = list(set(buds_live["NER"].values))
tag_values.append("PAD")
tag2idx = {t: i for i, t in enumerate(tag_values)}

In [10]:
MAX_LEN = 70
bs = 32

In [11]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
n_gpu = torch.cuda.device_count()

In [12]:
tokenizer = BertTokenizer.from_pretrained('bert-base-cased', do_lower_case=False)

Downloading:   0%|          | 0.00/213k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/436k [00:00<?, ?B/s]

In [13]:
def tokenize_and_preserve_labels(sentence, text_labels):
    tokenized_sentence = []
    labels = []

    for word, label in zip(sentence, text_labels):

        # Tokenize the word and count # of subwords the word is broken into
        tokenized_word = tokenizer.tokenize(word)
        n_subwords = len(tokenized_word)

        # Add the tokenized word to the final tokenized word list
        tokenized_sentence.extend(tokenized_word)

        # Add the same label to the new list of labels `n_subwords` times
        labels.extend([label] * n_subwords)

    return tokenized_sentence, labels

In [14]:
tokenized_texts_and_labels = [
    tokenize_and_preserve_labels(sent, labs)
    for sent, labs in zip(sentences, labels)
]

In [15]:
tokenized_texts = [token_label_pair[0] for token_label_pair in tokenized_texts_and_labels]
labels = [token_label_pair[1] for token_label_pair in tokenized_texts_and_labels]

In [16]:
input_ids = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in tokenized_texts],
                          maxlen=MAX_LEN, dtype="long", value=0.0,
                          truncating="post", padding="post")

In [17]:
for labels_ in labels:
    for label_ in labels_:
        if type(label_) == "NoneType":
            print(label_)

In [18]:
tags = pad_sequences([[tag2idx.get(l) for l in lab] for lab in labels],
                     maxlen=MAX_LEN, value=tag2idx["PAD"], padding="post",
                     dtype="long", truncating="post")

In [19]:
attention_masks = [[float(i != 0.0) for i in ii] for ii in input_ids]

In [20]:
tr_inputs, val_inputs, tr_tags, val_tags = train_test_split(input_ids, tags,
                                                            random_state=2018, test_size=0.1)
tr_masks, val_masks, _, _ = train_test_split(attention_masks, input_ids,
                                             random_state=2018, test_size=0.1)

In [21]:
tr_inputs = torch.tensor(tr_inputs).to(torch.int64)
val_inputs = torch.tensor(val_inputs).to(torch.int64)
tr_tags = torch.tensor(tr_tags).to(torch.int64)
val_tags = torch.tensor(val_tags).to(torch.int64)
tr_masks = torch.tensor(tr_masks).to(torch.int64)
val_masks = torch.tensor(val_masks).to(torch.int64)

In [22]:
train_data = TensorDataset(tr_inputs, tr_masks, tr_tags)
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=bs)

valid_data = TensorDataset(val_inputs, val_masks, val_tags)
valid_sampler = SequentialSampler(valid_data)
valid_dataloader = DataLoader(valid_data, sampler=valid_sampler, batch_size=bs)

In [23]:
model = BertForTokenClassification.from_pretrained(
    "bert-base-cased",
    num_labels=len(tag2idx),
    output_attentions = False,
    output_hidden_states = False
)

Downloading:   0%|          | 0.00/433 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/436M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForTokenClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cas

In [24]:
FULL_FINETUNING = True
if FULL_FINETUNING:
    param_optimizer = list(model.named_parameters())
    no_decay = ['bias', 'gamma', 'beta']
    optimizer_grouped_parameters = [
        {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
         'weight_decay_rate': 0.01},
        {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
         'weight_decay_rate': 0.0}
    ]
else:
    param_optimizer = list(model.classifier.named_parameters())
    optimizer_grouped_parameters = [{"params": [p for n, p in param_optimizer]}]

optimizer = AdamW(
    optimizer_grouped_parameters,
    lr=3e-5,
    eps=1e-8
)

In [28]:
epochs = 30
max_grad_norm = 1.0

# Total number of training steps is number of batches * number of epochs.
total_steps = len(train_dataloader) * epochs

# Create the learning rate scheduler.
scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=0,
    num_training_steps=total_steps
)


In [29]:
## Store the average loss after each epoch so we can plot them.
loss_values, validation_loss_values = [], []

for _ in trange(epochs, desc="Epoch"):
    # ========================================
    #               Training
    # ========================================
    # Perform one full pass over the training set.


    model.train()
    # 각 epoch마다 Reset.
    total_loss = 0

    # Training loop
    for step, batch in enumerate(train_dataloader):
    
        batch = tuple(t.to(device) for t in batch)
        b_input_ids, b_input_mask, b_labels = batch
        model.zero_grad()
        # forward pass
        outputs = model(b_input_ids, token_type_ids=None,
                        attention_mask=b_input_mask, labels=b_labels)
        # get the loss
        loss = outputs[0]
        # Perform a backward pass to calculate the gradients.
        loss.backward()
        total_loss += loss.item()
        # Clip the norm of the gradient
        torch.nn.utils.clip_grad_norm_(parameters=model.parameters(), max_norm=max_grad_norm)
        # parameter 업데이트
        optimizer.step()
        # learning rate 업데이트
        scheduler.step()

    # 평균 loss 계산
    avg_train_loss = total_loss / len(train_dataloader)
    print("Average train loss: {}".format(avg_train_loss))

    # loss value 저장
    loss_values.append(avg_train_loss)


    # ========================================
    #               Validation
    # ========================================

    # evaluation mode로 전환
    model.eval()
    # Reset epoch.
    eval_loss, eval_accuracy = 0, 0
    nb_eval_steps, nb_eval_examples = 0, 0
    predictions , true_labels = [], []
    for batch in valid_dataloader:
        batch = tuple(t.to(device) for t in batch)
        b_input_ids, b_input_mask, b_labels = batch

        # 계산할 모델이 있거나, Gredient 의 상황 알리기,
        with torch.no_grad():
            # Forward pass에는 logit predictions.
            outputs = model(b_input_ids, token_type_ids=None,
                            attention_mask=b_input_mask, labels=b_labels)
        #  CPU로 이동
        logits = outputs[1].detach().cpu().numpy()
        label_ids = b_labels.to('cpu').numpy()

        # test sentences accuracy .
        eval_loss += outputs[0].mean().item()
        predictions.extend([list(p) for p in np.argmax(logits, axis=2)])
        true_labels.extend(label_ids)

    eval_loss = eval_loss / len(valid_dataloader)
    validation_loss_values.append(eval_loss)
    print("Validation loss: {}".format(eval_loss))
    pred_tags = [tag_values[p_i] for p, l in zip(predictions, true_labels)
                                 for p_i, l_i in zip(p, l) if tag_values[l_i] != "PAD"]
    valid_tags = [tag_values[l_i] for l in true_labels
                                  for l_i in l if tag_values[l_i] != "PAD"]
    print("Validation Accuracy: {}".format(accuracy_score(pred_tags, valid_tags)))
    print("Validation F1-Score: {}".format(f1_score([pred_tags], [valid_tags])))
    print()

Epoch:   0%|                                                                                    | 0/30 [00:00<?, ?it/s]

Average train loss: 0.6135273851266428


Epoch:   3%|██▎                                                                    | 1/30 [25:09<12:09:21, 1509.00s/it]

Validation loss: 0.6627909583704812
Validation Accuracy: 0.8733962720890825
Validation F1-Score: 0.02185792349726776

Average train loss: 0.6090371934305719


Epoch:   7%|████▋                                                                  | 2/30 [46:51<10:47:31, 1387.55s/it]

Validation loss: 0.6677754925830024
Validation Accuracy: 0.8748487049140644
Validation F1-Score: 0.02203856749311295

Average train loss: 0.6043281798102275


Epoch:  10%|███████                                                               | 3/30 [1:03:49<9:08:33, 1219.01s/it]

Validation loss: 0.6690826139279774
Validation Accuracy: 0.8755749213265553
Validation F1-Score: 0.027624309392265192

Average train loss: 0.5966842505110412


Epoch:  13%|█████████▎                                                            | 4/30 [1:20:46<8:13:36, 1139.09s/it]

Validation loss: 0.6730221148048129
Validation Accuracy: 0.8760590656015492
Validation F1-Score: 0.033426183844011144

Average train loss: 0.5877877809420353


Epoch:  17%|███████████▋                                                          | 5/30 [1:37:49<7:37:14, 1097.38s/it]

Validation loss: 0.6749592444726399
Validation Accuracy: 0.8743645606390704
Validation F1-Score: 0.032

Average train loss: 0.5782547906166365


Epoch:  20%|██████████████                                                        | 6/30 [1:54:50<7:08:28, 1071.17s/it]

Validation loss: 0.6662779769727162
Validation Accuracy: 0.8743645606390704
Validation F1-Score: 0.03183023872679045

Average train loss: 0.57117240393863


Epoch:  23%|████████████████▎                                                     | 7/30 [2:11:55<6:44:50, 1056.09s/it]

Validation loss: 0.668658903666905
Validation Accuracy: 0.8753328491890583
Validation F1-Score: 0.032171581769437

Average train loss: 0.5631743904923191


Epoch:  27%|██████████████████▋                                                   | 8/30 [2:30:39<6:35:08, 1077.67s/it]

Validation loss: 0.6755708754062653
Validation Accuracy: 0.8755749213265553
Validation F1-Score: 0.032171581769437

Average train loss: 0.5544634761930514


Epoch:  30%|█████████████████████                                                 | 9/30 [2:48:06<6:13:50, 1068.10s/it]

Validation loss: 0.678057889853205
Validation Accuracy: 0.8758169934640523
Validation F1-Score: 0.03252032520325203

Average train loss: 0.5480209992212408


Epoch:  33%|███████████████████████                                              | 10/30 [3:05:25<5:53:01, 1059.08s/it]

Validation loss: 0.672495920743261
Validation Accuracy: 0.8736383442265795
Validation F1-Score: 0.04712041884816754

Average train loss: 0.5401341088679659


Epoch:  37%|█████████████████████████▎                                           | 11/30 [3:22:45<5:33:31, 1053.25s/it]

Validation loss: 0.6714408376387188
Validation Accuracy: 0.8753328491890583
Validation F1-Score: 0.0427807486631016

Average train loss: 0.5344989009264136


Epoch:  40%|███████████████████████████▌                                         | 12/30 [3:40:10<5:15:15, 1050.87s/it]

Validation loss: 0.6781922876834869
Validation Accuracy: 0.8724279835390947
Validation F1-Score: 0.046153846153846156

Average train loss: 0.5280968511805815


Epoch:  43%|█████████████████████████████▉                                       | 13/30 [3:57:45<4:58:06, 1052.17s/it]

Validation loss: 0.6833847377981458
Validation Accuracy: 0.8712176228516098
Validation F1-Score: 0.03655352480417755

Average train loss: 0.5198602839177396


Epoch:  47%|████████████████████████████████▏                                    | 14/30 [4:15:04<4:39:31, 1048.23s/it]

Validation loss: 0.6797747101102557
Validation Accuracy: 0.8733962720890825
Validation F1-Score: 0.04651162790697675

Average train loss: 0.5175337676240617


Epoch:  50%|██████████████████████████████████▌                                  | 15/30 [4:32:30<4:21:50, 1047.40s/it]

Validation loss: 0.696170877133097
Validation Accuracy: 0.86879690147664
Validation F1-Score: 0.04081632653061224

Average train loss: 0.5079863695036463


Epoch:  53%|████████████████████████████████████▊                                | 16/30 [4:49:53<4:04:06, 1046.18s/it]

Validation loss: 0.7028906430516925
Validation Accuracy: 0.8714596949891068
Validation F1-Score: 0.03664921465968587

Average train loss: 0.502802061183112


Epoch:  57%|███████████████████████████████████████                              | 17/30 [5:06:57<3:45:12, 1039.40s/it]

Validation loss: 0.7060786890132087
Validation Accuracy: 0.8625030259017187
Validation F1-Score: 0.04694835680751174

Average train loss: 0.4955678637288198


Epoch:  60%|█████████████████████████████████████████▍                           | 18/30 [5:24:02<3:27:01, 1035.13s/it]

Validation loss: 0.7113442676407951
Validation Accuracy: 0.8649237472766884
Validation F1-Score: 0.03931203931203931

Average train loss: 0.4905479202250473


Epoch:  63%|███████████████████████████████████████████▋                         | 19/30 [5:41:14<3:09:35, 1034.16s/it]

Validation loss: 0.7106467847313199
Validation Accuracy: 0.8678286129266521
Validation F1-Score: 0.034999999999999996

Average train loss: 0.4862375667616099


Epoch:  67%|██████████████████████████████████████████████                       | 20/30 [5:58:19<2:51:53, 1031.33s/it]

Validation loss: 0.7202487311192921
Validation Accuracy: 0.8654078915516824
Validation F1-Score: 0.04761904761904762

Average train loss: 0.48282389004691306


Epoch:  70%|████████████████████████████████████████████████▎                    | 21/30 [6:15:21<2:34:17, 1028.57s/it]

Validation loss: 0.7180486129862922
Validation Accuracy: 0.8656499636891793
Validation F1-Score: 0.04842615012106538

Average train loss: 0.47683957319299713


Epoch:  73%|██████████████████████████████████████████████████▌                  | 22/30 [6:32:23<2:16:54, 1026.79s/it]

Validation loss: 0.7137645993913923
Validation Accuracy: 0.8685548293391431
Validation F1-Score: 0.04975124378109453

Average train loss: 0.4748135438235868


Epoch:  77%|████████████████████████████████████████████████████▉                | 23/30 [6:49:14<1:59:13, 1021.91s/it]

Validation loss: 0.7224998878581184
Validation Accuracy: 0.8651658194141855
Validation F1-Score: 0.05755395683453237

Average train loss: 0.4707137635525535


Epoch:  80%|███████████████████████████████████████████████████████▏             | 24/30 [7:06:01<1:41:43, 1017.33s/it]

Validation loss: 0.7238314322062901
Validation Accuracy: 0.8632292423142096
Validation F1-Score: 0.047058823529411764

Average train loss: 0.46636751820059386


Epoch:  83%|█████████████████████████████████████████████████████████▌           | 25/30 [7:23:02<1:24:53, 1018.61s/it]

Validation loss: 0.7271541442189898
Validation Accuracy: 0.8646816751391915
Validation F1-Score: 0.043165467625899276

Average train loss: 0.4616482225285859


Epoch:  87%|███████████████████████████████████████████████████████████▊         | 26/30 [7:40:02<1:07:55, 1018.89s/it]

Validation loss: 0.7342226483992168
Validation Accuracy: 0.8641975308641975
Validation F1-Score: 0.047393364928909956

Average train loss: 0.4589640603345983


Epoch:  90%|███████████████████████████████████████████████████████████████▉       | 27/30 [7:56:59<50:55, 1018.44s/it]

Validation loss: 0.7358960807323456
Validation Accuracy: 0.8632292423142096
Validation F1-Score: 0.047505938242280284

Average train loss: 0.4557717129462907


Epoch:  93%|██████████████████████████████████████████████████████████████████▎    | 28/30 [8:14:02<33:59, 1019.79s/it]

Validation loss: 0.7396984483514514
Validation Accuracy: 0.8629871701767127
Validation F1-Score: 0.04716981132075472

Average train loss: 0.45522958516072826


Epoch:  97%|████████████████████████████████████████████████████████████████████▋  | 29/30 [8:31:10<17:02, 1022.10s/it]

Validation loss: 0.7447442476238523
Validation Accuracy: 0.8617768094892277
Validation F1-Score: 0.046403712296983764

Average train loss: 0.4534198239821346


Epoch: 100%|███████████████████████████████████████████████████████████████████████| 30/30 [8:49:29<00:00, 1058.98s/it]

Validation loss: 0.7429871473993573
Validation Accuracy: 0.8637133865892036
Validation F1-Score: 0.047393364928909956




