In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

import random
from tqdm import tqdm
from tensorboardX import SummaryWriter
import numpy as np

In [2]:
kor_vocab = torch.load('../datasets/korNeng_corpus/kor_vocab.pt')
eng_vocab = torch.load('../datasets/korNeng_corpus/eng_vocab.pt')

In [3]:
train_data = torch.load('../datasets/korNeng_corpus/train_tensor.pt')
valid_data = torch.load('../datasets/korNeng_corpus/valid_tensor.pt')
test_data = torch.load('../datasets/korNeng_corpus/test_tensor.pt')

In [10]:
from rnnsearch import RNNsearch

n_hiddens = 1000 #1000
n_inputs = len(kor_vocab)
n_outputs = len(eng_vocab)
n_embeddings = 620 #620
n_maxout = 500 # 500


model = RNNsearch(n_inputs, n_outputs, n_embeddings, n_hiddens, n_hiddens, 'cuda')
model.to('cuda')
optimizer = optim.Adadelta(model.parameters(), eps=1e-6, rho=0.95)
#optimizer = optim.Adam(model.parameters())

In [11]:
seed = 1234
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

In [12]:
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader

BATCH_SIZE = 8
PAD_IDX = kor_vocab['<pad>']
BOS_IDX = kor_vocab['<bos>']
EOS_IDX = kor_vocab['<eos>']
device = torch.device('cuda')

def generate_batch(data_batch):
    kor_batch, eng_batch = [], []
    for (kor_item, eng_item) in data_batch:
        kor_batch.append(torch.cat([torch.tensor([BOS_IDX]), kor_item, torch.tensor([EOS_IDX])], dim=0))
        eng_batch.append(torch.cat([torch.tensor([BOS_IDX]), eng_item, torch.tensor([EOS_IDX])], dim=0))
    kor_batch = pad_sequence(kor_batch, padding_value=PAD_IDX)
    eng_batch = pad_sequence(eng_batch, padding_value=PAD_IDX)
    return kor_batch, eng_batch

train_iter = DataLoader(train_data, batch_size=BATCH_SIZE,
                        shuffle=True, collate_fn=generate_batch, num_workers=0)
valid_iter = DataLoader(valid_data, batch_size=BATCH_SIZE,
                        shuffle=True, collate_fn=generate_batch, num_workers=0)
test_iter = DataLoader(test_data, batch_size=BATCH_SIZE,
                       shuffle=True, collate_fn=generate_batch, num_workers=0)

In [13]:
PAD_IDX = eng_vocab['<pad>']

criterion = nn.CrossEntropyLoss(ignore_index=PAD_IDX)

In [14]:
accumulation_step = 16

In [15]:
import math
import time

writer = SummaryWriter('attention_logs')
step = 1
def train(model: nn.Module,
          iterator: torch.utils.data.DataLoader,
          optimizer: optim.Optimizer,
          criterion: nn.Module,
          clip: float):
    
    global step

    model.train()

    epoch_loss = 0

    for _, (src, trg) in tqdm(enumerate(iterator)):
        src, trg = src.to(device), trg.to(device)
        
        output = model(src, trg, 0.5)

        output = output[1:].view(-1, output.shape[-1])
        trg = trg[1:].view(-1)

        loss = criterion(output, trg)
        loss.backward()

        if (_ + 1) % accumulation_step == 0:
            torch.nn.utils.clip_grad_norm_(model.parameters(), clip)          
            optimizer.step()
            optimizer.zero_grad()
        writer.add_scalar('attention_log/train_loss', loss.item(), step)
        step += 1

        epoch_loss += loss.item()
        

    return epoch_loss / len(iterator)


def evaluate(model: nn.Module,
             iterator: torch.utils.data.DataLoader,
             criterion: nn.Module):

    model.eval()

    epoch_loss = 0

    with torch.no_grad():

        for _, (src, trg) in enumerate(iterator):
            src, trg = src.to(device), trg.to(device)

            output = model(src, trg, 0) #turn off teacher forcing

            output = output[1:].view(-1, output.shape[-1])
            trg = trg[1:].view(-1)

            loss = criterion(output, trg)

            epoch_loss += loss.item()

    return epoch_loss / len(iterator)


def epoch_time(start_time: int,
               end_time: int):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs


N_EPOCHS = 1 # 10
CLIP = 1

best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):

    start_time = time.time()

    train_loss = train(model, train_iter, optimizer, criterion, CLIP)
    valid_loss = evaluate(model, valid_iter, criterion)

    end_time = time.time()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)

    print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. PPL: {math.exp(valid_loss):7.3f}')

test_loss = evaluate(model, test_iter, criterion)

print(f'| Test Loss: {test_loss:.3f} | Test PPL: {math.exp(test_loss):7.3f} |')

140212it [9:44:15,  4.00it/s]


Epoch: 01 | Time: 629m 59s
	Train Loss: 4.567 | Train PPL:  96.287
	 Val. Loss: 5.574 |  Val. PPL: 263.474
| Test Loss: 5.570 | Test PPL: 262.331 |


In [16]:
torch.save(model, 'rnnserach.pt')