In [1]:
import torch
import pandas as pd
from transformers import BertTokenizer, BertModel
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
BERT_BASE_CASED = 'bert-base-cased'

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
df = pd.read_csv('./uniqueBertSumTraining.csv')

In [3]:
class NLPDataset(Dataset):

    def __init__(self, df : pd.DataFrame) -> None:
        self.tokenizer : BertTokenizer = BertTokenizer.from_pretrained(BERT_BASE_CASED)
        self.q_datas = []
        self.q_clss_list = []
        self.r_datas = []
        self.r_clss_list = []
        self.s_labels = []
        self.q_sent_labels = []
        self.r_sent_labels = []
        for i in tqdm(df.index, desc="Constructing Dataset..."):
            row = df.iloc[i]
            q_data = self.tokenizer.encode_plus(row['q_word_token'].split(sep=';'), return_token_type_ids=False, add_special_tokens=False)
            q_clss = self.get_cls_indices(q_data['input_ids'])
            q_data['token_type_ids'] = eval(f'[{",".join(row["q_inter_seg"].split(sep = ";"))}]')
            q_sent_label = eval(f'[{",".join(row["comp_sent_q"].split(sep = ";"))}]')
            r_data = self.tokenizer.encode_plus(row['r_word_token'].split(sep=';'), return_token_type_ids=False, add_special_tokens=False)
            r_clss = self.get_cls_indices(r_data['input_ids'])
            r_data['token_type_ids'] = eval(f'[{",".join(row["r_inter_seg"].split(sep = ";"))}]')
            r_sent_label = eval(f'[{",".join(row["comp_sent_r"].split(sep = ";"))}]')
            s_label = 1 if row['s'] == "AGREE" else 0
            self.q_datas.append(q_data)
            self.q_clss_list.append(q_clss)
            self.r_datas.append(r_data)
            self.r_clss_list.append(r_clss)
            self.s_labels.append(s_label)
            self.q_sent_labels.append(q_sent_label)
            self.r_sent_labels.append(r_sent_label)

    def get_cls_indices(self, target) -> list:
        cls_indices = []
        for i, data in enumerate(target):
            if (data == 101):
                cls_indices.append(i)
        return cls_indices

    def __len__(self):
        return len(self.s_labels)

    def __getitem__(self, index):
        return (
            self.q_datas[index],
            self.q_clss_list[index],
            self.r_datas[index],
            self.r_clss_list[index],
            self.s_labels[index],
            self.q_sent_labels[index],
            self.r_sent_labels[index],
            )

dataset = NLPDataset(df = df)

Constructing Dataset...: 100%|██████████| 7855/7855 [00:02<00:00, 3292.01it/s]


In [4]:
class SentenceSelector(torch.nn.Module):

    def __init__(self) -> None:
        super(SentenceSelector, self).__init__()
        self.linear = torch.nn.Linear(768, 1)

    def forward(self, clss_hiddens):
        return self.linear(clss_hiddens)
class BertSumExtModel(torch.nn.Module):

    def __init__(self) -> None:
        super(BertSumExtModel, self).__init__()
        self.bertModel = BertModel.from_pretrained(BERT_BASE_CASED, return_dict = False)
        self.sentenceSelector = SentenceSelector()

    def forward(self, q_data, q_clss, r_data, r_clss):
        q_hidden = self.bertModel(**q_data)[0]
        r_hidden = self.bertModel(**r_data)[0]
        q_clss_hidden = q_hidden[0, q_clss]
        r_clss_hidden = r_hidden[0, r_clss]
        q_out = self.sentenceSelector(q_clss_hidden)
        r_out = self.sentenceSelector(r_clss_hidden)
        return q_out, r_out

In [5]:
model = BertSumExtModel().cuda()
loss_fn = torch.nn.BCEWithLogitsLoss()
# model.load_state_dict(torch.load('./BertSum_WO_Encoder(2).pt'))

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [6]:
model_opt = torch.optim.AdamW(model.parameters(), 3.68e-5)

In [7]:
lr_sc = torch.optim.lr_scheduler.LinearLR(model_opt, start_factor=0.5, total_iters = 4)

In [8]:
for epoch in range(5):
    total_loss = 0.
    currentLR = lr_sc.get_last_lr()[0]
    train_process = tqdm(dataset)
    for batch, (q_data, q_clss, r_data, r_clss, _, q_sent_label, r_sent_label) in enumerate(train_process, start = 1):
        q_data = {k: torch.tensor([v]).cuda() for k, v in q_data.items()}
        r_data = {k: torch.tensor([v]).cuda() for k, v in r_data.items()}
        q_clss = torch.tensor([q_clss]).cuda()
        r_clss = torch.tensor([r_clss]).cuda()
        model_opt.zero_grad()
        q_pred, r_pred = model(q_data, q_clss, r_data, r_clss)
        loss_q = loss_fn(q_pred.contiguous().reshape(1, -1), torch.tensor([q_sent_label], dtype=torch.float32).cuda())
        loss_r = loss_fn(r_pred.contiguous().reshape(1, -1), torch.tensor([r_sent_label], dtype=torch.float32).cuda())
        t_loss = loss_q + loss_r
        t_loss.backward()
        model_opt.step()
        total_loss += t_loss.item()
        train_process.set_postfix({"AVG_LOSS" : total_loss/ batch, "CURRENT_LR" : currentLR})
    lr_sc.step()

  4%|▍         | 346/7855 [04:35<1:39:30,  1.26it/s, AVG_LOSS=0.946, CURRENT_LR=1.84e-5] 


KeyboardInterrupt: 

In [None]:
torch.save(model.state_dict(), './BertSum_WO_Encoder(3).pt')