In [3]:
import json
import torch
import pandas as pd
from tokenizers.implementations import BertWordPieceTokenizer
from Bert import Bert
from torch.nn.utils.rnn import pad_sequence

In [4]:
df = pd.read_csv('../bert_impl_data/ner_datasetreference.csv', encoding='Windows-1252')

# 查看数据的前几行
df.fillna(method='ffill', inplace=True)
groups = df.groupby('Sentence #')
sentence = []
labels = []
Class = set([])
for _, i in groups:
    sentence.append(' '.join(i['Word']))
    labels.append(' '.join(i['Tag']))
    for k in i['Tag']:
        Class.add(k)
Class_new = {i: idx for idx, i in enumerate(Class)}
with open('../bert_impl_data/ner_class.json', 'w') as fp:
    id_class = {val: key for key, val in Class_new.items()}
    fp.write(json.dumps({'word_id': Class_new, 'id_word': id_class}, indent=4))


In [5]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

_tokenizer = BertWordPieceTokenizer("../custom/vocab.txt")
_embedding_dim = 384
_hidden_size = 3072
_num_head = 12
_out_dim = 512
max_epoch = 20
batch = 140
_num_layers = 12
vocab_size = _tokenizer.get_vocab_size()
bert = Bert(_embedding_dim, _hidden_size, _num_head, 128, _num_layers, _tokenizer)
bert.load_state_dict(torch.load("../bert_impl_weights/bert.pth"))
bert.train()

In [6]:
max_len = 0
new_sentence = []
new_labels = []
for idx, i in enumerate(sentence):
    if len(sentence[idx]) <= 127:
        new_sentence.append(torch.tensor(_tokenizer.encode(i).ids))
        cache = labels[idx].split(' ')
        new_cache = [Class_new[k] for k in cache]
        new_cache.insert(0, -1)
        new_cache.append(-1)
        new_labels.append(torch.tensor(new_cache))
        if len(sentence[idx]) > max_len:
            max_len = len(sentence[idx])
new_sentence.append(torch.tensor([0] * (max_len + 1)))
new_labels.append(torch.tensor([0] * (max_len + 1)))
new_labels = pad_sequence(new_labels, batch_first=True, padding_value=-1)[:-1]
new_sentence = pad_sequence(new_sentence, batch_first=True)[:-1]

In [7]:
from layers import Train
from torch.nn import Linear
import time


class Emo_trainer(Train):
    def __init__(self, model, optimizer):
        super().__init__(model, optimizer)

    def down_stream(self, batch_size, max_epoch, layer: torch.nn.Module, log=True, log_dir=None, Tensorboard_reloadInterval=30,
                    log_file_name='', monitor=True, pick_params=False):
        from torch.cuda.amp import GradScaler, autocast
        begin = time.time()
        max_iter = 0
        loss = 0
        scaler = GradScaler()
        if log:
            self.open_tensorboard(log_dir, Tensorboard_reloadInterval, f"({log_file_name})")
        COUNT = 0
        loss_func = torch.nn.CrossEntropyLoss(ignore_index=-1)
        path = None
        min_loss = 20
        best_loss = 20

        def run(train):
            try:
                score = self._model.forward(train, mask.to(device=device))
                score = layer(score).permute(0, 2, 1)
                score = loss_func.forward(score, answer)
                return score
            except Exception as e:
                raise e

        train_question = new_sentence[:int(len(new_sentence) * 0.8)].to(device=device)
        test_question = new_sentence[int(len(new_sentence) * 0.8):].to(device=device)
        train_answer = new_labels[:int(len(new_labels) * 0.8)].to(device=device)
        test_answer = new_labels[int(len(new_labels) * 0.8):].to(device=device)
        for epoch in range(max_epoch):
            iters = 0
            max_iter = len(train_question) // batch_size
            average_loss = 0
            for i in range(max_iter):
                start = i * batch_size + 1
                self._optimizer.zero_grad()
                with autocast():
                    batch_question = train_question[start:(i + 1) * batch_size + 1].to(device=device)
                    answer = train_answer[start:(i + 1) * batch_size + 1].to(device=device)
                    mask = batch_question != 0
                    loss = run(batch_question)
                scaler.scale(loss).backward()
                scaler.step(self._optimizer)
                scaler.update()
                loss = loss.detach_().item()
                average_loss += loss

                self.print_result((epoch, max_epoch), (iters, max_iter), loss, begin=begin, timing=True)
                iters += 1
            if self.writer:
                COUNT += 1
                try:
                    if average_loss / max_iter < min_loss:
                        min_loss = average_loss
                        try:
                            torch.save(layer.state_dict(), '../bert_impl_weights/down_stream_bert_ner_layer.pth')
                            torch.save(self._model.state_dict(), '../bert_impl_weights/down_stream_bert_ner.pth')
                        except Exception as e:
                            print(e)
                except ZeroDivisionError:
                    print(average_loss, max_iter, path, len(train_question), batch_size)
                correctness = self._model.down_stream(test_question, test_answer, batch_size, layer)
                self.writer.add_scalar("loss", average_loss / max_iter, COUNT)
                self.writer.add_scalar("correctness", correctness, COUNT)
            self.print_result((epoch, max_epoch), (max_iter, max_iter), loss, begin=begin, timing=True)
            try:
                if best_loss >= average_loss:
                    torch.save(layer.state_dict(), '../bert_impl_weights/down_stream_bert_ner_layer.pth')
                    torch.save(self._model.state_dict(), '../bert_impl_weights/down_stream_bert_ner.pth')
                    best_loss = average_loss
                torch.save(layer.state_dict(), '../bert_impl_weights/down_stream_bert_ner_layer.pth')
                torch.save(self._model.state_dict(), '../bert_impl_weights/down_stream_bert_ner.pth')
            except Exception as e:
                print(e)
        self.print_result((max_epoch, max_epoch), (max_iter, max_iter), loss, begin=begin, timing=True)
        if self.writer is not None and self.tensorboard_process is not None:
            self.writer.close()
            self.tensorboard_process.terminate()
layers = Linear(384, len(Class), device=bert.device)
optimizer = torch.optim.Adam(list(layers.parameters()) + list(bert.parameters()), lr=1e-4)
trainer = Emo_trainer(bert, optimizer)
trainer.add_bar('Epoch', 'Iter')
trainer.add_metrics('loss', float)
trainer.down_stream(batch, max_epoch, layers, log_dir="Bert_down_stream_ner", log=True,
                    log_file_name="Bert_down_stream_ner", monitor=False)


copy to run: tensorboard --logdir=C:\Users\123\PycharmProjects\torch-models\bert_impl\Bert_down_stream_ner --port=6006 --reload_interval=30
 ▏Epoch: │████████████████████│ 100.00% ▏Iter: │████████████████████│ 100.00% ▏Time: 5min57s ▏loss: 0.03161 