In [1]:
import json
import time
import pickle
from argparse import ArgumentParser, Namespace
from pathlib import Path
from typing import Dict

import torch
from tqdm import trange

from intent_dataset import SeqClsDataset
from utils import Vocab
from torch.utils.data import DataLoader
from intent_model import SeqClassifier
from transformers import AutoModelForSequenceClassification

TRAIN = "train"
DEV = "eval"
SPLITS = [TRAIN, DEV]
device = torch.device("cuda:1") if torch.cuda.is_available() else torch.device("cpu")

In [2]:
data_dir = "./data/intent/"
cache_dir = "./cache/intent/"
ckpt_dir = "./ckpt/intent/"
max_len = 28

with open(Path(cache_dir + "vocab.pkl"), "rb") as f:
    vocab: Vocab = pickle.load(f)

intent_idx_path = Path(cache_dir + "intent2idx.json")
intent2idx: Dict[str, int] = json.loads(intent_idx_path.read_text())

data_paths = {split: Path(data_dir + "%s.json" %split) for split in SPLITS}
data = {split: json.loads(path.read_text()) for split, path in data_paths.items()}
datasets: Dict[str, SeqClsDataset] = {
    split: SeqClsDataset(split_data, vocab, intent2idx, max_len)
    for split, split_data in data.items()
}

    
batch_size = 256
train_loader = DataLoader(datasets["train"], batch_size=batch_size, shuffle=False, collate_fn=datasets["train"].collate_fn)
val_loader = DataLoader(datasets["eval"], batch_size=batch_size, shuffle=False, collate_fn=datasets["eval"].collate_fn)

In [3]:
embeddings = torch.load(cache_dir + "embeddings.pt")

model_checkpoint = "bert-base-uncased"
model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=150)
# model = SeqClassifier(embeddings=embeddings, hidden_size=256, num_layers=2, dropout=0.3, bidirectional=True, num_class=150)
model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = torch.nn.CrossEntropyLoss()
epochs = 100
best_acc = 0.9
# epoch_pbar = trange(epochs, desc="Epoch")
his_trian_loss = []
his_val_loss = []
for epoch in range(epochs):
    epoch_start_time = time.time()
    
    train_loss = 0
    train_acc = 0
    train_len = 0
    val_loss = 0
    val_acc = 0
    val_len = 0
    
    model.train()
    for i, batch in enumerate(train_loader):
        optimizer.zero_grad()
        data = batch[0].to(device)
        label = batch[1].to(device)
        pred = model(data)
#         loss = criterion(pred, label)
        loss = pred[0]
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        train_acc += (pred.argmax(dim=1) == label).sum().item()/len(data)
    
    model.eval()
    with torch.no_grad():
        for i, batch in enumerate(val_loader):
            data = batch[0].to(device)
            label = batch[1].to(device)
            pred = model(data)
            loss = criterion(pred, label)
            val_loss += loss.item()
            val_acc += (pred.argmax(dim=1) == label).sum().item()/len(data)
            
    his_trian_loss.append(train_loss/train_loader.__len__())
    his_val_loss.append(val_loss/val_loader.__len__())
        
    print('[%03d/%03d] %2.2f sec(s) Train Loss: %.4f Acc: %.4f| Val loss: %.4f Acc: %.4f' % \
            (epoch + 1, epochs, time.time()-epoch_start_time, \
             train_loss/train_loader.__len__(), train_acc/train_loader.__len__(), \
             val_loss/val_loader.__len__(), val_acc/val_loader.__len__()))
    
    if val_acc/val_loader.__len__() >= best_acc:
        best_acc = val_acc/val_loader.__len__()
        torch.save(model.state_dict(), "/data/NFS/andy/course/ADL/hw1/intent_weights2.pt")
        print("saving model with acc:%.4f" %(best_acc))

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

RuntimeError: grad can be implicitly created only for scalar outputs

In [None]:
import matplotlib.pyplot as plt

def plot(train_metirc, val_metric, metric_name, loss=False):
    plt.plot(train_metirc, label='train_%s' %metric_name)
    plt.plot(val_metric, label="val_%s" %metric_name)
    plt.xlabel('epochs')
    plt.ylabel(metric_name)
    if loss:
        plt.legend(loc='upper right')
    else:
        plt.legend(loc='lower right')
    plt.show()

In [None]:
plot(his_trian_loss, his_val_loss, "loss")

In [None]:
# max_len = 0
# for i in range(len(datasets["train"].data)):
#     sentence = datasets["train"].data[i]["text"]
#     words = sentence.split(" ")
#     if len(words) > max_len:
#         max_len = len(words)
# print(max_len)