In [1]:
import torch
from torch.utils.data import Dataset
from transformers import BertTokenizer
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import torch.nn as nn
from transformers import BertModel
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score
import sys
sys.path.append("../Public/")
from models import SentimentClassifier, BertDataset
from utils import get_accuracy
import requests
import time


In [2]:
# immutable parameters
device = "cuda"
criterion = nn.BCEWithLogitsLoss()
dataName = "Yelp"
fileName = f"{dataName}-FineTune"
targetData = "imdb"
model_dict_name = f"../{dataName}/{dataName}-best.pt"
save_dict_name = f"{fileName}-best.pt"
apiKey = "E8qq5xWZ95iyQZB6hSezKV"
title =f"{fileName} notebook completed!"
NUM_WORKERS = 12

In [3]:
# 导入模型
model = SentimentClassifier()
model = model.to(device)
model.load_state_dict(torch.load(model_dict_name))

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


<All keys matched successfully>

In [4]:
BATCH_SIZE = 128
MAX_LEN = 256
LEARNING_RATE = 0.002
EPOCHS = 4

In [5]:
# 导入数据集和优化器
train_set = BertDataset(f'../Data/{targetData}/train.csv', maxlen = MAX_LEN )
val_set = BertDataset(f'../Data/{targetData}/val.csv', maxlen = MAX_LEN )
train_loader = DataLoader(train_set, batch_size = BATCH_SIZE,shuffle=True,num_workers=NUM_WORKERS)
val_loader = DataLoader(val_set, batch_size = BATCH_SIZE,shuffle=True,num_workers=NUM_WORKERS)
test_set = BertDataset(f'../Data/{targetData}/test.csv', maxlen = MAX_LEN )
test_loader = DataLoader(test_set, batch_size = BATCH_SIZE,shuffle=True,num_workers=NUM_WORKERS)
# 优化器
optimizer = optim.Adam(model.parameters(), lr = LEARNING_RATE)

In [6]:
names_list = [fileName]
result_df = pd.DataFrame(columns=['V_Loss','V_Acc','T_Loss','T_Acc'],index=names_list)
data_list = []

In [7]:
best_val_acc = 0
start_time = time.time()

for epoch in range(EPOCHS):

    train_loss = 0.0
    train_acc=0.0
    val_loss=0
    val_acc=0.0
    epoch_start_time = time.time()

    model.train()
    for i,data in enumerate(train_loader):
        input_ids,attention_mask,labels=[elem.to(device) for elem in data]
        #优化器置零
        optimizer.zero_grad()
        #得到模型的结果
        out=model(input_ids,attention_mask)
        #计算误差
        loss=criterion(out.squeeze(-1),labels.float())
        train_loss += loss.item()
        #误差反向传播
        loss.backward()
        #更新模型参数
        optimizer.step()
        #计算acc 
        out=out.detach().cpu().numpy()
        labels=labels.detach().cpu().numpy()
        train_acc+=get_accuracy(out,labels)
    

    train_acc /= len(train_loader)
    train_loss /= len(train_loader)
    print(f"train {epoch+1}/{EPOCHS} epochs Loss: {train_loss:3.6f}, Acc: {train_acc:3.6f}, times: {(time.time()-epoch_start_time):.2f}s")


    epoch_start_time = time.time()
    model.eval()
    with torch.no_grad():
        for j,batch in enumerate(val_loader):
            val_input_ids,val_attention_mask,val_labels=[elem.to(device) for elem in batch]
            pred=model(val_input_ids,val_attention_mask)
            loss=criterion(pred.squeeze(-1),val_labels.float())
            pred=pred.detach().cpu().numpy()
            val_labels=val_labels.detach().cpu().numpy()
            val_acc += get_accuracy(pred,val_labels)
            val_loss += loss.item()

    val_acc /= len(val_loader)
    val_loss /= len(val_loader)
    
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_val_loss = val_loss
        best_state_dict = model.state_dict()

    print(f"val   {epoch+1}/{EPOCHS} epochs Loss: {val_loss:3.6f}, Acc: {val_acc:3.6f}, times: {(time.time()-epoch_start_time):.2f}s")
    print(f"{epoch+1}/{EPOCHS} epochs: Best_Acc: {best_val_acc:3.6f}")
    

torch.save(best_state_dict,save_dict_name)
print(f"{fileName} on {targetData} {EPOCHS}: Best_Acc: {best_val_acc:3.6f}, times: {(time.time()-start_time):.2f}s")


result_df.loc[fileName,'V_Loss']=best_val_loss
result_df.loc[fileName,'V_Acc']=best_val_acc


train 1/4 epochs Loss: 0.451176, Acc: 0.779757, times: 144.23s
val   1/4 epochs Loss: 0.397007, Acc: 0.820312, times: 38.28s
1/4 epochs: Best_Acc: 0.820312
train 2/4 epochs Loss: 0.408582, Acc: 0.804439, times: 144.18s
val   2/4 epochs Loss: 0.396509, Acc: 0.818945, times: 36.92s
2/4 epochs: Best_Acc: 0.820312
train 3/4 epochs Loss: 0.406386, Acc: 0.806777, times: 144.54s
val   3/4 epochs Loss: 0.377993, Acc: 0.819922, times: 37.29s
3/4 epochs: Best_Acc: 0.820312
train 4/4 epochs Loss: 0.400311, Acc: 0.808569, times: 145.17s
val   4/4 epochs Loss: 0.382539, Acc: 0.840430, times: 36.45s
4/4 epochs: Best_Acc: 0.840430
Yelp-FineTune on imdb 4: Best_Acc: 0.840430, times: 735.44s


In [8]:
model = SentimentClassifier()
model.load_state_dict(torch.load(save_dict_name))
model.to(device)
model.eval()
test_acc = 0
test_loss = 0

with torch.no_grad():
    for j,batch in enumerate(test_loader):
        test_input_ids,test_attention_mask,test_labels=[elem.to(device) for elem in batch]
        pred=model(test_input_ids,test_attention_mask)
        loss=criterion(pred.squeeze(-1),test_labels.float())
        pred=pred.detach().cpu().numpy()
        test_labels=test_labels.detach().cpu().numpy()
        test_acc += get_accuracy(pred,test_labels)
        test_loss += loss.item()

test_acc /= len(test_loader)
test_loss /= len(test_loader)

print(f'on {targetData} test set: Loss: {test_acc:3.6f} , Acc: {test_acc:3.6f}')

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


on imdb test set: Loss: 0.856445 , Acc: 0.856445


In [9]:
result_df.loc[fileName,'T_Loss']=test_loss
result_df.loc[fileName,'T_Acc']=test_acc
result_df.to_csv(f"{fileName}-result.csv")

In [10]:
notebookName = f"Bert {fileName}"
context = f"notebook {notebookName} :  is completed"
requests.get(f"https://api.day.app/{apiKey}/{title}/{context}?sound=anticipate")

<Response [200]>