In [1]:
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from model_builder import Summarizer
from transformers import AdamW
import torch.nn as nn


In [2]:
# bert_data_path = os.path.abspath("./dataset/bert_data_for_BERTSUM/TC/")
bert_data_train = torch.load("./dataset/bert_data_for_BERTSUM/TC/PTS.train.all.bert.pt")
print(len(bert_data_train))

185


In [11]:
class DocDataset(Dataset):
    def __init__(self, mode, list_of_dict):
        assert mode in ["train", "test"]
        self.mode = mode
        self.list_of_dict = list_of_dict
            
    def __getitem__(self,idx):
        inputid = self.list_of_dict[idx]['src']
        tokentype = self.list_of_dict[idx]['segs']
        attentionmask = self.list_of_dict[idx]['att_msk']
        label = None
        if (self.mode == "train"):
            label = self.list_of_dict[idx]['labels']
            label = torch.tensor(label)
        return inputid , tokentype , attentionmask, label
    
    def __len__(self):
        return len(self.list_of_dict)

In [12]:
""" model setting (training) """
BATCH_SIZE = 4
trainSet = DocDataset("train", bert_data_train)
# print(trainSet[0])
trainLoader = DataLoader(trainSet, batch_size=BATCH_SIZE)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("device:", device)
model = Summarizer("classifier")
optimizer = AdamW(model.parameters(), lr=1e-5) # AdamW = BertAdam
loss_fct = nn.BCELoss()

device: cuda:0


In [5]:
print("""
name            module
----------------------""")
for name, module in model.named_children():
    if name == "bert":
        for n, _ in module.named_children():
            print(f"{name}:{n}")
#             print(_)
    else:
        print("{:15} {}".format(name, module))


name            module
----------------------
bert:embeddings
bert:encoder
bert:pooler
encoder         Classifier(
  (linear1): Linear(in_features=768, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)


In [13]:
""" training """
from datetime import datetime,timezone,timedelta

model = model.to(device)
model.train()
EPOCHS = 10
MODEL_PATH = "./model/classifier"
dt1 = datetime.utcnow().replace(tzinfo=timezone.utc)
dt2 = dt1.astimezone(timezone(timedelta(hours=8))) # 轉換時區 -> 東八區
print(dt2)
for epoch in range(EPOCHS):
    running_loss = 0.0
    binary_running_loss = 0.0

    for data in trainLoader:
        tokens_tensors, segments_tensors, masks_tensors, \
        labels = [t.to(device) for t in data]

        # 將參數梯度歸零
        optimizer.zero_grad()

        # forward pass
        sent_score = model(x=tokens_tensors, 
                      segs=segments_tensors, 
                      attention_mask=masks_tensors)
        
        loss = loss_fct(sent_score, labels)

        # backward
        loss.backward()
        optimizer.step()

        # 紀錄當前 batch loss
        running_loss += loss.item()
        print("\r running_loss: " + str(running_loss), end="")
        
    CHECKPOINT_NAME =  MODEL_PATH +'.EPOCHES.' + str(epoch) + '.pkl' 
    torch.save(model.state_dict(), CHECKPOINT_NAME)

    dt1 = datetime.utcnow().replace(tzinfo=timezone.utc)
    dt2 = dt1.astimezone(timezone(timedelta(hours=8))) # 轉換時區 -> 東八區
    print('%s\t[epoch %d] loss: %.3f' %
          (dt2, epoch + 1, running_loss))

2020-09-03 21:24:08.600250+08:00


RuntimeError: stack expects each tensor to be equal size, but got [17] at entry 0 and [15] at entry 1