In [1]:
import torch
from torch.utils.data import Dataset
from transformers import BertTokenizer
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import torch.nn as nn
from transformers import BertModel
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score
import sys
sys.path.append("../Public/")
from models import SentimentClassifier, BertDataset
from utils import get_accuracy,full_permutation
import requests
import time
from collections import OrderedDict

In [2]:
device = "cuda"
criterion = nn.BCEWithLogitsLoss()
dataName = "Yelp"
model_dict_name = f"{dataName}-best.pt"
apiKey = "E8qq5xWZ95iyQZB6hSezKV"
title =f"{dataName} notebook completed!"

In [3]:
# 导入模型
model = SentimentClassifier()
model = model.to(device)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [4]:
hyperParams = OrderedDict({
'BATCH_SIZES':[64],
'MAX_LENS':[128],
'LEARNING_RATES':[1e-5,1e-4,1e-3],
"EPOCHSS":[100]
})
table = full_permutation(hyperParams)

In [None]:

start_time = time.time()
combs_best_acc = 0.0
n_combs = len(table)


print(f"all {n_combs} combs:")

for comb in range(n_combs):

    BATCH_SIZE = int(table.loc[comb,'BATCH_SIZE'])
    MAX_LEN =  int(table.loc[comb,'MAX_LEN'])
    LEARNING_RATE = table.loc[comb,'LEARNING_RATE']
    EPOCHS =  int(table.loc[comb,'EPOCHS'])

    print(f"Comb{comb+1} starting: BATCH_SIZE: {BATCH_SIZE} MAX_LEN: {MAX_LEN} LEARNING_RATE: {LEARNING_RATE} EPOCHS: {EPOCHS}")
    combs_start_time = time.time()

    # 加载数据
    train_set = BertDataset(f'../Data/{dataName}/train.csv', maxlen = MAX_LEN )
    val_set = BertDataset(f'../Data/{dataName}/val.csv', maxlen = MAX_LEN )
    train_loader = DataLoader(train_set, batch_size = BATCH_SIZE,shuffle=True,num_workers=NUM_WORKERS)
    val_loader = DataLoader(val_set, batch_size = BATCH_SIZE,shuffle=True,num_workers=NUM_WORKERS)
    # 优化器
    optimizer = optim.Adam(model.parameters(), lr = LEARNING_RATE)
    best_val_acc = 0

    for epoch in range(EPOCHS):

        train_loss = 0.0
        train_acc=0.0
        val_loss=0
        val_acc=0.0
        epoch_start_time = time.time()

        model.train()
        for i,data in enumerate(train_loader):
            input_ids,attention_mask,labels=[elem.to(device) for elem in data]
            #优化器置零
            optimizer.zero_grad()
            #得到模型的结果
            out=model(input_ids,attention_mask)
            #计算误差
            loss=criterion(out.squeeze(-1),labels.float())
            train_loss += loss.item()
            #误差反向传播
            loss.backward()
            #更新模型参数
            optimizer.step()
            #计算acc 
            out=out.detach().cpu().numpy()
            labels=labels.detach().cpu().numpy()
            train_acc+=get_accuracy(out,labels)
        
        train_acc /= len(train_loader)
        train_loss /= len(train_loader)
        print(f"C{comb+1}/C{n_combs}: train {epoch+1}/{EPOCHS} epochs Loss: {train_loss:3.6f}, Acc: {train_acc:3.6f}, times: {(time.time()-epoch_start_time):.2f}s")


        epoch_start_time = time.time()
        model.eval()
        with torch.no_grad():
            for j,batch in enumerate(val_loader):
                val_input_ids,val_attention_mask,val_labels=[elem.to(device) for elem in batch]
                pred=model(val_input_ids,val_attention_mask)
                loss=criterion(pred.squeeze(-1),val_labels.float())
                pred=pred.detach().cpu().numpy()
                val_labels=val_labels.detach().cpu().numpy()
                val_acc += get_accuracy(pred,val_labels)
                val_loss += loss.item()

        val_acc /= len(val_loader)
        val_loss /= len(val_loader)
       
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_state_dict = model.state_dict()
            best_val_loss = val_loss

        print(f"C{comb+1}/C{n_combs}: val   {epoch+1}/{EPOCHS} epochs Loss: {val_loss:3.6f}, Acc: {val_acc:3.6f}, Best_Acc: {best_val_acc:3.6f}, times: {(time.time()-epoch_start_time):.2f}s")

    
    table.loc[comb,'LOSS'] = round(best_val_loss,6)
    table.loc[comb,'BEST_ACC'] = round(best_val_acc,6)

    if best_val_acc > combs_best_acc:
        combs_best_acc = best_val_acc 
        combs_best_sd = best_state_dict
        combs_best_loss = best_val_loss

    print(f"C{comb+1}/C{n_combs}: Best_Acc: {best_val_acc:3.6f}, combs_Best_Acc: {combs_best_acc:3.6f}, times: {(time.time()-combs_start_time):.2f}s")

print(f"{n_combs} Combs Best accuracy: {combs_best_acc:3.6f}, All times: {(time.time()-start_time):.2f}s")

torch.save(combs_best_sd, model_dict_name)
table.to_csv(f"{dataName}-combs.csv",index=False)

In [None]:
del model
del train_loader
del val_loader

In [None]:
index = table['BEST_ACC'].argmax()
MAX_LEN = int(table.loc[index,'MAX_LEN'])
BATCH_SIZE = int(table.loc[index,'BATCH_SIZE'])

In [None]:
names_list = [dataName]
result_df = pd.DataFrame(columns=['Loss','Acc'],index=names_list)

In [None]:
test_set = BertDataset(f'../Data/{dataName}/test.csv', maxlen = MAX_LEN )
test_loader = DataLoader(test_set, batch_size = BATCH_SIZE,shuffle=True)

In [None]:
model = SentimentClassifier()
model.load_state_dict(torch.load(model_dict_name))
model.to(device)
model.eval()

test_acc = 0
test_loss = 0

with torch.no_grad():
    for j,batch in enumerate(test_loader):
        test_input_ids,test_attention_mask,test_labels=[elem.to(device) for elem in batch]
        pred=model(test_input_ids,test_attention_mask)
        loss=criterion(pred.squeeze(-1),test_labels.float())
        pred=pred.detach().cpu().numpy()
        test_labels=test_labels.detach().cpu().numpy()
        test_acc += get_accuracy(pred,test_labels)
        test_loss += loss.item()

test_acc /= len(test_loader)
test_loss /= len(test_loader)

print(f'on {dataName} test set: Loss: {test_acc:3.6f} , Acc: {test_acc:3.6f}')

result_df.loc[dataName] = [round(test_loss,6),round(test_acc,6)]
result_df.to_csv(f"{dataName}-result.csv")

In [None]:
hyperParamslist = ['BATCH_SIZE','MAX_LEN','LEARNING_RATE',"EPOCHS"]
all_table = pd.read_csv(f"{dataName}-combsAll.csv",index_col=0)
all_table = all_table.append(table)
all_table = all_table.groupby(hyperParamslist).mean()
all_table = all_table.reset_index(hyperParamslist)
all_table.sort_values(hyperParamslist,inplace=True)
all_table.to_csv(f"{dataName}-combsAll.csv",index=False)

In [None]:
notebookName = f"Bert-{dataName}"
context = f"notebook {notebookName} :  is completed"
requests.get(f"https://api.day.app/{apiKey}/{title}/{context}?sound=anticipate")