In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install transformers
import torch
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertConfig, BertForSequenceClassification

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# model weight 存放位置
model_path = '/content/drive/MyDrive/課程資料/深度學習/期末專題第31組/final-bert-model.pt'
# bert tokenizer 處理過的 train data 存放位置
train_data_path = '/content/drive/MyDrive/課程資料/深度學習/期末專題第31組/train-tensor.pt'
# bert tokenizer 處理過的 test data 存放位置
test_data_path = '/content/drive/MyDrive/課程資料/深度學習/期末專題第31組/test-tensor.pt'
# 測試結果存放位置
output_path = '/content/drive/MyDrive/課程資料/深度學習/Final/results.csv'

max_length = 512
batch_size = 1
lr = 1e-5

nums_epochs = 1

In [4]:
class reviews_dataest(Dataset):
  def __init__(self, tensor_path, test=False):
    data = torch.load(tensor_path)
    self.p1 = data['p1']
    self.p2 = data['p2']
    self.test = test
    if not test:
      self.labels = data['labels']
    else:
      self.ID = data['ID']

  def __len__(self):
    return self.p1['input_ids'].shape[0]
  
  def __getitem__(self, idx):
    if self.test:
      return self.p1['input_ids'][idx], self.p1['token_type_ids'][idx], self.p1['attention_mask'][idx], \
             self.p2['input_ids'][idx], self.p2['token_type_ids'][idx], self.p2['attention_mask'][idx], self.ID[idx]
    return self.p1['input_ids'][idx], self.p1['token_type_ids'][idx], self.p1['attention_mask'][idx], \
           self.p2['input_ids'][idx], self.p2['token_type_ids'][idx], self.p2['attention_mask'][idx], self.labels[idx]

In [5]:
class SplitBertModel(torch.nn.Module):
  def __init__(self):
    super(SplitBertModel, self).__init__()
    configuration = BertConfig()
    self.bert = BertForSequenceClassification(configuration)
    self.bert.classifier = torch.nn.Linear(768, 5, bias=True)
    self.relu = torch.nn.ReLU()
    self.linear = torch.nn.Linear(2 * 5, 2)
    

  def forward(self, p1_id, p1_tid, p1_am, p2_id, p2_tid, p2_am):
    p1 = self.bert(input_ids=p1_id, attention_mask=p1_am, token_type_ids=p1_tid).logits
    p2 = self.bert(input_ids=p2_id, attention_mask=p2_am, token_type_ids=p2_tid).logits
    
    p = torch.cat((p1, p2), dim=1)
    p = self.relu(p)
    p = self.linear(p)

    return p

In [None]:
train_dataset = reviews_dataest(train_data_path, test=False)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
len(train_dataset)

In [None]:
test_dataset = reviews_dataest(test_data_path, test=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
len(test_dataset)

In [None]:
# train cell

model = SplitBertModel().to(device)
weights = torch.load(model_path, map_location=device)
model.load_state_dict(weights)

optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
criterion = torch.nn.CrossEntropyLoss()
train_loss = np.array([])

# start training
for epoch in range(nums_epochs):
  loop = tqdm(train_loader, leave=True)
  t_loss = list()
  model.train()
  for data in loop:
    p1_id, p1_tid, p1_am, p2_id, p2_tid, p2_am, labels = data
    p1_id = p1_id.to(device)
    p1_am = p1_am.to(device)
    p1_tid = p1_tid.to(device)

    p2_id = p2_id.to(device)
    p2_am = p2_am.to(device)
    p2_tid = p2_tid.to(device)
    
    labels = labels.to(device)

    output = model(p1_id, p1_tid, p1_am, p2_id, p2_tid, p2_am)
    loss = criterion(output, labels)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    loop.set_description(f'Epoch {epoch}')
    loop.set_postfix(loss=loss.item())

    t_loss.append(loss.item())

  train_loss = np.append(train_loss, np.mean(t_loss))
  # torch.save(model.state_dict())

In [None]:
# test cell

with torch.no_grad():
  model = SplitBertModel().to(device)
  model.load_state_dict(torch.load(model_path, map_location=device))
  model.eval()

  ID = np.array([], dtype=int)
  predict = np.array([], dtype=int)

  for data in tqdm(test_loader, leave=True):
    p1_id, p1_tid, p1_am, p2_id, p2_tid, p2_am, id = data
    p1_id = p1_id.to(device)
    p1_am = p1_am.to(device)
    p1_tid = p1_tid.to(device)

    p2_id = p2_id.to(device)
    p2_am = p2_am.to(device)
    p2_tid = p2_tid.to(device)
    
    id = id.view(-1).to('cpu').numpy()
    ID = np.append(ID, id)
    
    output = model(p1_id, p1_tid, p1_am, p2_id, p2_tid, p2_am)
    pred = torch.argmax(output, dim=1).view(-1).to('cpu').numpy()
    predict = np.append(predict, pred)

predict_df = pd.DataFrame()
predict_df['ID'] = ID
predict_df['Generation'] = predict
predict_df.to_csv(output_path, index=False)