In [1]:
from transformers import BertModel, BertTokenizer
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn import metrics

In [2]:
class bertconfig():
    def __init__(self):
        self.bert_path = './bert_pretrain'
        self.tokenizer = BertTokenizer.from_pretrained(self.bert_path)
        self.class_list = [x.strip() for x in open('./THUCNews/data/class.txt').readlines()]
        self.pad_size = 32
        self.batch_size = 128
        self.device = 'cpu'
        self.hidden_size = 768
        self.num_classes = len(self.class_list)
        self.learning_rate = 5e-5
        self.num_epochs = 10

In [3]:
config = bertconfig()
config.num_classes

10

In [4]:
def build_dataset(pad_size):
    def load_data(path,pad_size):
        input = []
        label = []
        with open(path, 'r', encoding='UTF-8') as f:
            for line in tqdm(f):
                content = line.strip().split('\t')[0]
                label = line.strip().split('\t')[1]
                token = config.tokenizer.tokenize(content)
                token = ['CLS']+token
                token_ids = config.tokenizer.convert_tokens_to_ids(token)
                seq_len = len(token_ids)
                if len(token_ids)<pad_size:
                    token_ids = token_ids+[0]*(pad_size-len(token_ids))
                    mask = [1]*seq_len+[0]*(pad_size-seq_len)
                else:
                    token_ids = token_ids[:pad_size]
                    seq_len = pad_size
                    mask = [1]*seq_len
                input.append((token_ids,int(label),seq_len,mask))
        return input
    train_data = load_data('./THUCNews/data/train.txt',pad_size)
    test_data = load_data('./THUCNews/data/test.txt',pad_size)
    dev_data = load_data('./THUCNews/data/dev.txt',pad_size)
    return train_data,test_data,dev_data

In [5]:
train_data,test_data,dev_data = build_dataset(config.pad_size)

33590it [00:08, 3876.14it/s]
10000it [00:02, 3971.21it/s]
10000it [00:02, 3802.26it/s]


In [6]:
class DatasetIterator(object):
    def __init__(self, batches, batch_size, device):
        self.batch_size = batch_size
        self.batches = batches
        self.n_batches = len(batches) // batch_size
        self.residue = False  # 记录batch数量是否为整数
        if len(batches) % self.n_batches != 0:
            self.residue = True
        self.index = 0
        self.device = device
        
    def _to_tensor(self, datas):
        x = torch.LongTensor([_[0] for _ in datas]).to(self.device)
        y = torch.LongTensor([_[1] for _ in datas]).to(self.device)

        # pad前的长度(超过pad_size的设为pad_size)
        seq_len = torch.LongTensor([_[2] for _ in datas]).to(self.device)
        mask = torch.LongTensor([_[3] for _ in datas]).to(self.device)
        return (x, seq_len, mask), y

    def __next__(self):
        if self.residue and self.index == self.n_batches:
            batches = self.batches[self.index * self.batch_size: len(self.batches)]
            self.index += 1
            batches = self._to_tensor(batches)
            return batches

        elif self.index >= self.n_batches:
            self.index = 0
            raise StopIteration
        else:
            batches = self.batches[self.index * self.batch_size: (self.index + 1) * self.batch_size]
            self.index += 1
            batches = self._to_tensor(batches)
            return batches

    def __iter__(self):
        return self

    def __len__(self):
        if self.residue:
            return self.n_batches + 1
        else:
            return self.n_batches


def build_iterator(dataset, config):
    iter = DatasetIterator(dataset, config.batch_size, config.device)
    return iter

In [7]:
train_iter = build_iterator(train_data, config)

In [8]:
for i, (trains, labels) in enumerate(train_iter):
    print(trains)

(tensor([[ 100,  704, 1290,  ...,    0,    0,    0],
        [ 100,  697, 1921,  ...,    0,    0,    0],
        [ 100,  691,  126,  ...,    0,    0,    0],
        ...,
        [ 100,  783, 7183,  ...,    0,    0,    0],
        [ 100, 2458, 4669,  ...,    0,    0,    0],
        [ 100, 3136, 5509,  ...,    0,    0,    0]]), tensor([19, 23, 21, 25, 22, 21, 16, 22, 16, 12, 21, 23, 22, 16,  8, 17, 20, 24,
         8, 10, 18, 16, 24, 21, 18, 15, 11, 21, 19, 19, 22, 22, 17, 23, 24, 17,
        13, 18, 23, 19, 23, 21, 23, 21, 20, 14, 18, 16, 18, 24, 16, 23, 21, 17,
        16, 13, 23, 20, 21, 21, 13, 23, 18, 15, 25, 17, 21, 23, 23, 14, 20, 20,
        18, 17, 23, 15, 23, 21, 20, 15, 22, 21, 22, 20, 20, 15, 13, 21, 22, 15,
        21, 21, 23, 15, 23, 19, 17, 18, 14, 21, 14, 16, 21, 12, 17, 23, 15, 22,
        16, 16, 16, 18, 22, 16, 25, 17, 19, 18, 15, 18, 13, 22, 21, 14, 22, 17,
        16, 22]), tensor([[1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0,

(tensor([[ 100, 1333, 3779,  ...,    0,    0,    0],
        [ 100, 1079, 5885,  ...,    0,    0,    0],
        [ 100, 4511, 2094,  ...,    0,    0,    0],
        ...,
        [ 100, 4289,  689,  ...,    0,    0,    0],
        [ 100, 8166, 2399,  ...,    0,    0,    0],
        [ 100, 7481, 2094,  ...,    0,    0,    0]]), tensor([14, 18, 16, 21, 19, 21, 18, 17, 22, 17, 21, 19, 22, 19, 16, 26, 17, 23,
        20, 17, 22, 19, 20, 19, 22, 19, 12, 20, 21, 17, 12, 17, 20, 21, 20, 20,
        17, 19, 25, 19, 19, 17, 16, 18, 22, 19, 19, 17, 18, 17, 18, 21, 21, 18,
        25, 20, 17, 18, 21, 21, 21, 15, 10, 21, 16, 17, 18, 20, 15, 21, 21, 20,
        17, 17, 21, 16, 24, 18, 13, 16, 21, 20, 23, 18, 20, 19, 17, 20, 19, 15,
        19, 22, 18, 16, 14, 15, 23, 18, 18, 23, 17, 25, 16, 22, 18, 19, 18, 17,
        16, 24, 18, 19, 17, 23, 21, 21, 18, 21, 22, 21, 22, 16, 17, 20, 18, 17,
        17, 17]), tensor([[1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0,

(tensor([[ 100, 4143, 2157,  ...,    0,    0,    0],
        [ 100, 1367, 6163,  ...,    0,    0,    0],
        [ 100, 1045, 7213,  ...,    0,    0,    0],
        ...,
        [ 100, 1313, 2428,  ...,    0,    0,    0],
        [ 100, 2571, 6380,  ...,    0,    0,    0],
        [ 100, 7931, 2496,  ...,    0,    0,    0]]), tensor([22, 23, 15, 13, 20, 20, 19, 23, 18, 18, 19, 24, 18, 20, 22, 16, 16, 21,
        22, 16, 22, 21, 18, 18, 22, 13, 23, 20, 17, 15, 20, 17, 17, 18, 17, 16,
        21, 21, 22, 12, 15, 19, 22, 19, 21, 16, 13, 16, 25, 21, 24, 21, 17, 22,
        15, 14, 18, 17, 23, 24, 22, 24, 18, 17, 20, 22, 20, 15, 17, 14, 22, 15,
        17, 19, 23, 18, 15, 14, 18, 16, 15, 23, 19, 19, 19, 17, 20, 23, 21, 23,
        15, 18, 18, 18, 24, 18, 22, 21, 19, 24, 16, 24, 23, 17, 23, 19, 20, 17,
        21, 15, 17, 22, 17, 12, 20, 17, 21, 21, 18, 19, 20, 23, 21, 24, 24, 16,
        17, 22]), tensor([[1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0,

(tensor([[ 100, 1952, 2349,  ...,    0,    0,    0],
        [ 100, 4511, 2094,  ...,    0,    0,    0],
        [ 100, 1068,  754,  ...,    0,    0,    0],
        ...,
        [ 100, 9226, 5101,  ...,    0,    0,    0],
        [ 100,  769, 3211,  ...,    0,    0,    0],
        [ 100, 1957, 1036,  ...,    0,    0,    0]]), tensor([20, 14, 21, 22, 17, 12, 10, 16, 16, 17, 17, 11, 19, 20, 17, 19, 23, 19,
        16, 16, 22, 15, 16, 16, 18, 22, 22, 15, 17, 23, 25, 20, 23, 25, 20, 20,
        19, 19, 18, 21, 15, 22, 15, 17, 15, 23, 22, 22, 13, 18, 12, 14, 18, 17,
        18, 19, 20, 22, 22, 16, 15, 21, 25, 16, 12, 23, 23, 17, 23, 21, 18, 20,
        22, 19, 23, 21, 19, 16, 18, 20, 18, 22, 19, 19, 22, 11, 20, 22, 22, 21,
        20, 15, 18, 18, 15, 21, 18, 23, 12, 13, 17, 23, 14, 17, 21, 13, 22, 23,
        20, 22, 20, 15, 19, 18, 18, 23, 25, 23, 17, 23, 19, 22, 15, 14, 20, 25,
        19, 18]), tensor([[1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0,

In [9]:
class Model(nn.Module): 

    def __init__(self, config):
        super(Model, self).__init__()
        self.bert = BertModel.from_pretrained(config.bert_path)
        for param in self.bert.parameters():
            param.requires_grad = True
        self.fc = nn.Linear(config.hidden_size, config.num_classes)

    def forward(self, x):
        context = x[0]  # 输入的句子
        mask = x[2]  # 对padding部分进行mask，和句子一个size，padding部分用0表示，如：[1, 1, 1, 1, 0, 0]
        _ = self.bert(context, attention_mask=mask)
        out = self.fc(_[1])
        return out

In [10]:
model = Model(config).to(config.device)
model.train()

Some weights of the model checkpoint at ./bert_pretrain were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(21128, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      

In [11]:
optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate)

In [11]:
for epoch in range(config.num_epochs):
    idx = 0
    for i, (trains, labels) in tqdm(enumerate(train_iter)):
        outputs = model(trains)
        loss = F.cross_entropy(outputs, labels)
        loss.backward()
        optimizer.step()
        if idx%10==0:
            true = labels.data.cpu()
            predic = torch.max(outputs.data, 1)[1].cpu()
            train_acc = metrics.accuracy_score(true, predic)
            print(train_acc)
        idx+=1
        #    model.eval()
        #    loss_total = 0
        #    predict_all = np.array([], dtype=int)
        #    labels_all = np.array([], dtype=int)

1it [00:25, 25.47s/it]

0.09375


2it [01:17, 38.93s/it]
ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "C:\Users\lzd05\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3437, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-12-3a3929059d29>", line 6, in <module>
    loss.backward()
  File "C:\Users\lzd05\anaconda3\lib\site-packages\torch\_tensor.py", line 255, in backward
    torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
  File "C:\Users\lzd05\anaconda3\lib\site-packages\torch\autograd\__init__.py", line 147, in backward
    Variable._execution_engine.run_backward(
KeyboardInterrupt

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Users\lzd05\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2061, in showtraceback
    stb = value._render_traceback_()
AttributeError: 'KeyboardInterrupt' object has no attribute '_render_traceback_'

During handling of the above exception

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "C:\Users\lzd05\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3437, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-12-3a3929059d29>", line 6, in <module>
    loss.backward()
  File "C:\Users\lzd05\anaconda3\lib\site-packages\torch\_tensor.py", line 255, in backward
    torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
  File "C:\Users\lzd05\anaconda3\lib\site-packages\torch\autograd\__init__.py", line 147, in backward
    Variable._execution_engine.run_backward(
KeyboardInterrupt

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Users\lzd05\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2061, in showtraceback
    stb = value._render_traceback_()
AttributeError: 'KeyboardInterrupt' object has no attribute '_render_traceback_'

During handling of the above exception

TypeError: object of type 'NoneType' has no len()