In [15]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
import torch.distributed as dist
from torch.nn.parallel import DistributedDataParallel
import torch.multiprocessing as mp

import pandas as pd
import time
import datetime
import os

from transformers import BertForSequenceClassification, BertTokenizer
from transformers import AdamW, get_linear_schedule_with_warmup

# for delete warning message
# https://stackoverflow.com/questions/62691279/how-to-disable-tokenizers-parallelism-true-false-warning
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"

# load model and tokenizer
def get_model():
    model = BertForSequenceClassification.from_pretrained("kykim/bert-kor-base")
    tokenizer = BertTokenizer.from_pretrained("kykim/bert-kor-base")
    return model, tokenizer

# NSMC dataset class
class NSMCDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

def load_dataset(tokenizer):
    # load nsmc dataset
    nsmc_train = pd.read_csv('./nsmc/ratings_train.txt', sep='\t', encoding='utf-8')
    nsmc_test = pd.read_csv('./nsmc/ratings_test.txt', sep='\t', encoding='utf-8')

    nsmc_train['document'] = nsmc_train['document'].apply(str)
    nsmc_test['document'] = nsmc_test['document'].apply(str)
    # encoding
    # train_encodings = tokenizer(list(map(str, nsmc_train['document'])), truncation=True, padding=True)
    # test_encodings = tokenizer(list(map(str, nsmc_test['document'])), truncation=True, padding=True)
    train_encodings = tokenizer(list(nsmc_train['document']), truncation=True, padding=True)
    test_encodings = tokenizer(list(nsmc_test['document']), truncation=True, padding=True)

    train_dataset = NSMCDataset(train_encodings, nsmc_train['label'])
    test_dataset = NSMCDataset(test_encodings, nsmc_test['label'])

    return train_dataset, test_dataset

def test(model, test_loader, device):
    model.cuda(device)
    model.eval()
    total = 0
    total_loss = 0
    correct = 0
    with torch.no_grad():
        for inputs in test_loader:
            start = time.time()
            inputs = {k: v.cuda(device) for k, v in inputs.items()}
            outputs = model(**inputs)

            total += inputs['labels'].size(0)
            correct += inputs['labels'].eq(outputs.logits.argmax(axis=1)).sum().item()
            total_loss += outputs.loss.item()

            acc = 100 * correct / total

    return acc, total_loss / len(test_loader)

In [4]:
model, tokenizer = get_model()

Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

In [5]:
x = torch.load('saved_models/BERT_NSMC_multiGPU_test1/epoch_14_0.5837965384825096_accuracy_91.118.pt')

In [9]:
load_state_dict = {k[7:]:v for k,v in x['state_dict'].items()}

In [11]:
model.load_state_dict(load_state_dict)

<All keys matched successfully>

In [12]:
train_dataset, test_dataset = load_dataset(tokenizer)

In [17]:
torch.cuda.empty_cache()

In [18]:
test_loader = DataLoader(
        test_dataset, batch_size=256, shuffle=False,pin_memory=True)

In [19]:
acc, loss = test(model, test_loader, 7)

In [20]:
loss

0.5854317802099549

In [21]:
acc

91.118