In [58]:
import numpy as np

import torch
import torch.nn as nn
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import Dataset, DataLoader
from torch import optim
import torch.nn.functional as F

from sklearn.model_selection import train_test_split
import seaborn as sns
from typing import List, Set, Dict, Tuple

In [59]:
train_source_loc = 'naver/train_source.txt'
train_target_loc = 'naver/train_target.txt'

test_source_loc = 'naver/test_source.txt'
test_target_loc = 'naver/test_target.txt'

def open_process(file_loc):
    file = open(file_loc, 'r')
    file_lines = file.readlines()
    file_lines = [list(map(int, line.split())) for line in file_lines]
    return file_lines

SOS_token = 0
EOS_token = 1
PAD_token = -1

train_source = open_process(train_source_loc)
train_target = open_process(train_target_loc)

test_source = open_process(test_source_loc)
test_target = open_process(test_target_loc)

train_src_vocab = set([i for line in train_source for i in line])
train_tgt_vocab = set([i for line in train_target for i in line])

test_src_vocab = set([i for line in test_source for i in line])
test_tgt_vocab = set([i for line in test_target for i in line])

train_src_vocab_dict = {value: idx+2 for idx, value in enumerate(sorted(train_src_vocab))}
train_src_vocab_dict[0] = 'SOS_token'
train_src_vocab_dict[1] = 'EOS_token'
train_tgt_vocab_dict = {value: idx+2 for idx, value in enumerate(sorted(train_tgt_vocab))}
train_tgt_vocab_dict[0] = 'SOS_token'
train_tgt_vocab_dict[1] = 'EOS_token'

test_src_vocab_dict = {value: idx+2 for idx, value in enumerate(sorted(test_src_vocab))}
test_src_vocab_dict[0] = 'SOS_token'
test_src_vocab_dict[1] = 'EOS_token'

test_tgt_vocab_dict = {value: idx+2 for idx, value in enumerate(sorted(test_tgt_vocab))}
train_tgt_vocab_dict[0] = 'SOS_token'
train_tgt_vocab_dict[1] = 'EOS_token'

train_src_vocab_size = len(train_src_vocab)
train_tgt_vocab_size = len(train_tgt_vocab)

test_src_vocab_size = len(test_src_vocab)
test_tgt_vocab_size = len(test_tgt_vocab)

In [63]:
print(f'train source length: {len(train_source)}')
print(f'mean length: {int(np.mean([len(i) for i in train_source]))}')
print(f'max / min length: {len(max(train_source, key=len)), len(min(train_source, key=len))}')
print(f'vocab size: {len(train_src_vocab)}')
print('='*30)

print(f'train target length: {len(train_target)}')
print(f'mean length: {int(np.mean([len(i) for i in train_target]))}')
print(f'max / min length: {len(max(train_target, key=len)), len(min(train_target, key=len))}')
print(f'vocab size: {len(train_tgt_vocab)}')

print(f'test source length: {len(test_source)}')
print(f'mean length: {int(np.mean([len(i) for i in test_source]))}')
print(f'max / min length: {len(max(test_source, key=len)), len(min(test_source, key=len))}')
print(f'vocab size: {len(test_src_vocab)}')

print(f'test target length: {len(test_target)}')
print(f'mean length: {int(np.mean([len(i) for i in test_target]))}')
print(f'max / min length: {len(max(test_target, key=len)), len(min(test_target, key=len))}')
print(f'vocab size: {len(test_tgt_vocab)}')

train source length: 7260
mean length: 18
max / min length: (81, 2)
vocab size: 53
train target length: 7260
mean length: 10
max / min length: (54, 1)
vocab size: 595
test source length: 2000
mean length: 19
max / min length: (84, 1)
vocab size: 45
test target length: 2000
mean length: 10
max / min length: (54, 1)
vocab size: 495


In [64]:
train_source = [[train_src_vocab_dict[i] for i in seq] + [EOS_token] for seq in train_source]
train_target = [[SOS_token] + [train_tgt_vocab_dict[i] for i in seq] for seq in train_target]

test_source = [[test_src_vocab_dict[i] for i in seq] for seq in test_source]
test_target = [[test_tgt_vocab_dict[i] for i in seq] for seq in test_target]

In [65]:
MAX_LENGTH = 81

In [66]:
def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH):
    # initialization modification needed
    encoder_hidden = encoder.initHidden()
    
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)
    
    loss = 0
    
    for index in range(input_length):
        encoder_output, encoder_hidden = encoder(
            input_tensor[index], encoder_hidden)
        encoder_outputs[index] = encoder_output[0,0]
        
    decoder_input = torch.tensor([[SOS_token]], device = device)
    decoder_hidden = encoder_hidden
    
    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    if use_teacher_forcing:
        # Teacher forcing 포함: 목표를 다음 입력으로 전달
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            loss += criterion(decoder_output, target_tensor[di])
            decoder_input = target_tensor[di]  # Teacher forcing

    else:
        # Teacher forcing 미포함: 자신의 예측을 다음 입력으로 사용
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            topv, topi = decoder_output.topk(1)
            decoder_input = topi.squeeze().detach()  # 입력으로 사용할 부분을 히스토리에서 분리

            loss += criterion(decoder_output, target_tensor[di])
            if decoder_input.item() == EOS_token:
                break

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length

In [67]:
import time
def trainIters(encoder, decoder, n_iters, print_every=1000, plot_every=100, learning_rate=0.01):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # print_every 마다 초기화
    plot_loss_total = 0  # plot_every 마다 초기화

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
    training_pairs = [tensorsFromPair(random.choice(pairs))
                      for i in range(n_iters)]
    criterion = nn.NLLLoss()

    for iter in range(1, n_iters + 1):
        training_pair = training_pairs[iter - 1]
        input_tensor = training_pair[0]
        target_tensor = training_pair[1]

        loss = train(input_tensor, target_tensor, encoder,
                     decoder, encoder_optimizer, decoder_optimizer, criterion)
        print_loss_total += loss
        plot_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
                                         iter, iter / n_iters * 100, print_loss_avg))

        if iter % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0

    showPlot(plot_losses)
    
import matplotlib.pyplot as plt
plt.switch_backend('agg')
import matplotlib.ticker as ticker
import numpy as np


def showPlot(points):
    plt.figure()
    fig, ax = plt.subplots()
    # 주기적인 간격에 이 locator가 tick을 설정
    loc = ticker.MultipleLocator(base=0.2)
    ax.yaxis.set_major_locator(loc)
    plt.plot(points)

In [68]:
# torch.cuda.is_available() checks and returns a Boolean True if a GPU is available, else it'll return False
is_cuda = torch.cuda.is_available()

# If we have a GPU available, we'll set our device to GPU. We'll use this device variable later in our code.
if is_cuda:
    device = torch.device("cuda")
    print("GPU is available")
else:
    device = torch.device("cpu")
    print("GPU not available, CPU used")

GPU is available


In [69]:
hidden_size = 256
encoder = EncoderRNN(train_src_vocab_size, hidden_size).to(device)
decoder = DecoderRNN(hidden_size, train_tgt_vocab_size).to(device)

trainIters(encoder, decoder, 75000, print_every=5000)

NameError: name 'tensorsFromPair' is not defined

In [7]:
class SeqDataset(Dataset):
    def __init__(self, src_seq: List, tgt_seq: List):
        self.src_seq = src_seq
        self.tgt_seq = tgt_seq
        
        self.src_maxlen = len(max(self.src_seq, key=len))
        self.tgt_maxlen = len(max(self.tgt_seq, key=len))
        
    def __len__(self)-> int:
        return len(self.src_seq)
    
    def __getitem__(self, idx: int) -> List:
        src = torch.tensor(self.src_seq[idx])
        tgt = torch.tensor(self.tgt_seq[idx])
        
        return src, tgt
    
def pad_collate(batch) -> (List, List):
    (xs, ys)= zip(*batch)
    
    x_lens = [len(x) for x in xs]
    y_lens = [len(y) for y in ys]

    x_pad = pad_sequence(xs, batch_first=True, padding_value=-1)
    y_pad = pad_sequence(ys, batch_first=True, padding_value=-1)
    
    return x_pad, y_pad

In [52]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size
        
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)
        
    def forward(self, input_seq, hidden):
        embedded = self.embedding(input_seq).view(1, 1, -1)
        output = embedded
        output, hidden = self.gru(output, hidden)
        return output, hidden
    
    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size)
    
class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(output_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input_seq, hidden):
        output = self.embedding(input_seq).view(1, 1, -1)
        output = F.relu(output)
        output, hidden = self.gru(output, hidden)
        output = self.softmax(self.out(output[0]))
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)