In [1]:
from src.model import EpochLogger, MakeEmbed
from src.dataset import MakeDataset
from torch.utils.data import DataLoader

embed = MakeEmbed()
embed.load_word2vec()

batch_size = 128
dataset = MakeDataset()
ood_train_dataset, ood_test_dataset = dataset.make_ood_dataset(embed)

train_dataloader = DataLoader(ood_train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(ood_test_dataset, batch_size=batch_size, shuffle=True)

In [2]:
import torch
from src.model import DAN
weights = embed.word2vec.wv.vectors
weights = torch.FloatTensor(weights)

dan_model = DAN(weights, 256, 0.5, 2)
optimizer = torch.optim.Adam(dan_model.parameters(), lr= 0.001)


In [3]:
from tqdm import tqdm
from tqdm import trange
import os
import torch.nn.functional as F

epoch = 100
prev_acc = 0
save_dir = "./nlp/pretrained/"
save_prefix = "ood_clsf"

def save(model, save_dir, save_prefix, epoch):
    if not os.path.isdir(save_dir):
        os.makedirs(save_dir)
    save_prefix = os.path.join(save_dir, save_prefix)
    save_path = '{}_steps_{}.pt'.format(save_prefix, epoch)
    torch.save(model.state_dict(), save_path)

for i in range(epoch):
    steps = 0

    dan_model.train() # 모델 학습 하겠다. (parameters가 수정됨)

    with tqdm(train_dataloader, unit="batch") as tepoch: # 진행상황 표시
        for data in tepoch:
            tepoch.set_description(f"Epoch {i}")
            x = data[0]
            target = data[1]
            logit = dan_model.forward(x)

            optimizer.zero_grad()
            loss = F.cross_entropy(logit, target) 
            loss.backward()
            optimizer.step()

            corrects = (torch.max(logit, 1)[1].view(target.size()).data == target.data).sum()
            accuracy = 100.0 * corrects/x.size()[0]
            tepoch.set_postfix(loss=loss.item(), accuracy= accuracy.numpy())

    dan_model.eval() # 모델 검증하겠다 (parameters 수정안됨)
    steps = 0
    accuarcy_list = []
    with tqdm(test_dataloader, unit="batch") as tepoch:
        for data in tepoch:
            tepoch.set_description(f"Epoch {i}")
            x = data[0]
            target = data[1]

            logit = dan_model.forward(x)
            loss = F.cross_entropy(logit, target)
            corrects = (torch.max(logit, 1)[1].view(target.size()).data == target.data).sum()
            accuracy = 100.0 * corrects/x.size()[0]
            accuarcy_list.append(accuracy.tolist())

            tepoch.set_postfix(loss=loss.item(), accuracy= sum(accuarcy_list)/len(accuarcy_list))

    # epoch 당 검증 셋의 정확도를 계산하고 이전 정확도 보다 높으면 저장
    acc = sum(accuarcy_list)/len(accuarcy_list)
    if(acc>prev_acc):
        prev_acc = acc
        save(dan_model, save_dir, save_prefix+"_"+str(round(acc,3)), i)

Epoch 0: 100%|████████████████████████████████████████████████████████████████████████████████████| 90/90 [00:01<00:00, 89.02batch/s, accuracy=84.0, loss=0.317]
Epoch 0: 100%|███████████████████████████████████████████████████████████████████████████████████| 23/23 [00:00<00:00, 201.76batch/s, accuracy=96.6, loss=0.263]
Epoch 1: 100%|███████████████████████████████████████████████████████████████████████████████████| 90/90 [00:00<00:00, 92.31batch/s, accuracy=98.4, loss=0.0562]
Epoch 1: 100%|██████████████████████████████████████████████████████████████████████████████████| 23/23 [00:00<00:00, 196.59batch/s, accuracy=99.4, loss=0.0747]
Epoch 2: 100%|███████████████████████████████████████████████████████████████████████████████████| 90/90 [00:00<00:00, 91.93batch/s, accuracy=99.2, loss=0.0427]
Epoch 2: 100%|██████████████████████████████████████████████████████████████████████████████████| 23/23 [00:00<00:00, 148.39batch/s, accuracy=99.4, loss=0.0394]
Epoch 3: 100%|████████████████████

KeyboardInterrupt: 