In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
import torch.distributed as dist
from torch.nn.parallel import DistributedDataParallel
import torch.multiprocessing as mp

import pandas as pd
import time
import datetime
import os

from transformers import ElectraForSequenceClassification, ElectraTokenizer
from transformers import AdamW, get_linear_schedule_with_warmup

# load model and tokenizer
def get_model():
    model = ElectraForSequenceClassification.from_pretrained("monologg/koelectra-small-discriminator")
    tokenizer = ElectraTokenizer.from_pretrained("monologg/koelectra-small-discriminator")
    return model, tokenizer

# NSMC dataset class
class NSMCDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

def load_dataset(tokenizer):
    # load nsmc dataset
    nsmc_train = pd.read_csv('../../data/nsmc/ratings_train.txt', sep='\t', encoding='utf-8')
    nsmc_test = pd.read_csv('../../data/nsmc/ratings_train.txt', sep='\t', encoding='utf-8')

    nsmc_train['document'] = nsmc_train['document'].apply(str)
    nsmc_test['document'] = nsmc_test['document'].apply(str)
    # encoding
    # train_encodings = tokenizer(list(map(str, nsmc_train['document'])), truncation=True, padding=True)
    # test_encodings = tokenizer(list(map(str, nsmc_test['document'])), truncation=True, padding=True)
    train_encodings = tokenizer(list(nsmc_train['document']), truncation=True, padding=True)
    test_encodings = tokenizer(list(nsmc_test['document']), truncation=True, padding=True)

    train_dataset = NSMCDataset(train_encodings, nsmc_train['label'])
    test_dataset = NSMCDataset(test_encodings, nsmc_test['label'])

    return train_dataset, test_dataset


In [9]:
model, tokenizer=get_model()

Some weights of the model checkpoint at monologg/koelectra-small-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-small-discriminator and are newly initialized: ['cl

In [10]:
k = torch.load('./saved_models/epoch_9_0.002_accuracy_93.542.pt')

In [22]:
load_dict = {key[7:]: value for key, value in k['state_dict'].items()}

In [24]:
model.load_state_dict(load_dict)

<All keys matched successfully>

In [25]:
train_dataset, test_dataset = load_dataset(tokenizer)

In [27]:
from ELECTRA_DDP import Trainer

In [28]:
test_loader = DataLoader(test_dataset, batch_size=256)

In [30]:
class a(object):
    def __init__(self):
        self.rank=1

In [32]:
d = a()

In [33]:
d.rank

1

In [57]:
model.to('cuda')

ElectraForSequenceClassification(
  (electra): ElectraModel(
    (embeddings): ElectraEmbeddings(
      (word_embeddings): Embedding(32200, 128, padding_idx=0)
      (position_embeddings): Embedding(512, 128)
      (token_type_embeddings): Embedding(2, 128)
      (LayerNorm): LayerNorm((128,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (embeddings_project): Linear(in_features=128, out_features=256, bias=True)
    (encoder): ElectraEncoder(
      (layer): ModuleList(
        (0): ElectraLayer(
          (attention): ElectraAttention(
            (self): ElectraSelfAttention(
              (query): Linear(in_features=256, out_features=256, bias=True)
              (key): Linear(in_features=256, out_features=256, bias=True)
              (value): Linear(in_features=256, out_features=256, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): ElectraSelfOutput(
              (dense): Linear(in_

In [60]:
trainer = Trainer(model, 1,1,1,test_loader, 'cuda:0', d)

In [63]:
x = next(iter(test_loader))

In [65]:
device = 'cuda:3'

In [66]:
xx = {k:v.to(device) for k, v in x.items()}

In [67]:
model.to(device)
model.eval()

outputs = model(**xx)

In [74]:
outputs.logits.argmax(axis=1).detach().eq(xx['labels'].detach()).sum().item()

241

In [69]:
xx['labels'].size()

torch.Size([256])

In [72]:
outputs.logits.argmax().size()

torch.Size([])

In [75]:
device = 'cuda:3'
model.eval()
model.to(device)

total = 0
correct = 0
for batch in test_loader:
    inputs = {k:v.to(device) for k, v in batch.items()}
    with torch.no_grad():
        outputs = model(**inputs)
        correct += inputs['labels'].detach().eq(outputs.logits.argmax(axis=1).detach()).sum().item()
        total += inputs['labels'].size(0)

In [76]:
correct, total

(140313, 150000)

In [77]:
correct / total

0.93542

In [61]:
outputs = trainer.test()

In [62]:
outputs

(93.542, tensor(0.0007, device='cuda:0'))