In [1]:
import matplotlib.pyplot as plt
import numpy as np
import random
 
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

import sequence

In [2]:
# global 変数
(x_train, y_train), (x_test, y_test) = sequence.load_data()
x_train = torch.tensor(x_train)
x_test = torch.tensor(x_test)
y_train = torch.tensor(y_train)
y_test = torch.tensor(y_test)

char_to_id, id_to_char = sequence.get_vocab()

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# hyper-parameter
MAX_INPUT = 7
MAX_OUTPUT = 5
HIDDEN_DIM = 32
EMB_DIM = 32
vocab_size = len(char_to_id)
LEARNING_RATE = 0.001
BATCH_SIZE = 100
EPOCH = 100

In [3]:
class Encoder(nn.Module):
    def __init__(self, vocab_size, hidden_dim, emb_dim):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.emb_dim = emb_dim
        self.embedding = nn.Embedding(num_embeddings=vocab_size, embedding_dim=self.emb_dim) # padding_idx=5 #num_embeddings: inputの系列の長さ
    # 単語の分散表現の初期化
#     self.embedding.weight.data.copy_(torch.from_numpy(pretrained_weight)) #今回はいらない    
        self.gru = nn.GRU(input_size=emb_dim, hidden_size=hidden_dim, batch_first=True)

    def forward(self, indices, batch_size=100):
        # indices = tensor([batch_size, MAX_INPUT(7)])
        embedding = self.embedding(indices)
        assert(indices.size(1) == 7)
        if embedding.dim() == 2:
            embedding = torch.unsqueeze(embedding, 1) #3次元にしている
        _, state = self.gru(embedding, torch.zeros(1, batch_size, self.hidden_dim).to(device)) #最初の入力は0ベクトル
        return state

In [4]:
class Decoder(nn.Module):
    def __init__(self, vocab_size, hidden_dim, emb_dim):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.emb_dim = emb_dim

        self.embedding = nn.Embedding(num_embeddings=vocab_size, embedding_dim=self.emb_dim)
        self.gru = nn.GRU(input_size=emb_dim, hidden_size=hidden_dim, batch_first=True)
        self.linear = nn.Linear(hidden_dim, vocab_size)

    def forward(self, indices, init_hidden):
        embedding = self.embedding(indices)
        if embedding.dim() == 2:
            embedding = torch.unsqueeze(embedding, 1)
        output, state = self.gru(embedding, init_hidden) #最初の入力は0ベクトル
        output = self.linear(output)
        return output, state

In [5]:
class Seq2seq:
    def __init__(self):
        self.encoder = Encoder(vocab_size=vocab_size, hidden_dim=HIDDEN_DIM, emb_dim=EMB_DIM).to(device)
        self.decoder = Decoder(vocab_size=vocab_size, hidden_dim=HIDDEN_DIM, emb_dim=EMB_DIM).to(device)
        self.criterion = nn.CrossEntropyLoss()
    
    def forward(self, encoder_input, decoder_input):
        batch_size = encoder_input.size(0)
        encoder_hidden = self.encoder(encoder_input, batch_size)
        
        source = decoder_input[:, :-1]
        target = decoder_input[:, 1:]
        
        # 1文字ずつ入力し、outputのcross entropyを計算する
        loss = 0
        batch_size = encoder_hidden.size(1)
        target_length = target.size(1)
        source_length = source.size(1)
        decoder_output = np.zeros((batch_size, target_length))
        for i in range(source_length):
            decoder_result, _ = self.decoder(source[:, i], encoder_hidden)
            decoder_result = torch.squeeze(decoder_result)
            decoder_output[:, i] = np.argmax(decoder_result.detach().numpy(), axis=1)
            loss += self.criterion(decoder_result, target[:, i])
        return loss, decoder_output

In [6]:
class Trainer:
    def __init__(self, model):
        self.model = model
        self.encoder_optimizer = optim.Adam(model.encoder.parameters(), lr=LEARNING_RATE)
        self.decoder_optimizer = optim.Adam(model.decoder.parameters(), lr=LEARNING_RATE)
        
    def fit(self, X, Y):
        # 重みの初期化
        self.encoder_optimizer.zero_grad()
        self.decoder_optimizer.zero_grad()
        
        self.train_loss, self.train_output =  self.model.forward(X, Y)

        # backward
        self.train_loss.backward()
        self.train_loss = self.train_loss.item()

        self.encoder_optimizer.step()
        self.decoder_optimizer.step()
        
    def predict(self):
        return self.train_output
    
    def get_loss(self):
        return self.train_loss

In [7]:
model = Seq2seq()
trainer = Trainer(model)

In [None]:
from sklearn.utils import shuffle
from math import ceil

def train_loader(data, target, batch_size=100):
    input_batchs = []
    output_batchs = []
#     data, target = shuffle(data, target)
    
    batch_num = ceil(len(data) // batch_size)
    for i in range(batch_num):
        if i == batch_num - 1:
            each_input_batchs =  data[i * batch_size:]
            each_output_batchs = target[i * batch_size:]
        else:
            each_input_batchs =  data[i * batch_size: (i + 1) * batch_size]
            each_output_batchs = target[i * batch_size:(i + 1) * batch_size]
        input_batchs.append(each_input_batchs)
        output_batchs.append(each_output_batchs)
    return input_batchs, output_batchs

In [None]:
print("Training..")

train_losses = []
test_losses = []

for e in range(EPOCH):
    train_loss = 0
    input_batchs, output_batchs = train_loader(x_train, y_train)

    for i in range(len(input_batchs)): # mini-batchごとに最適化
        input_batch = input_batchs[i].to(device)
        output_batch = output_batchs[i].to(device)
        
#         # 重みの初期化
#         encoder_optimizer.zero_grad()
#         decoder_optimizer.zero_grad()
        
#         train_loss, train_output =  model.forward(input_batch, output_batch)

#         # backward
#         train_loss.backward()
#         train_loss = train_loss.item()

#         encoder_optimizer.step()
#         decoder_optimizer.step()

    trainer.fit(input_batch, output_batch)
    train_output = trainer.predict()
    train_loss = trainer.get_loss()
    
    train_losses.append(train_loss)
    
    test_loss, test_output = model.forward(x_test, y_test)
        
    test_loss = test_loss.item()
    test_losses.append(test_loss)
    
    print(train_loss, test_loss)

Training..
10.43000602722168 10.346759796142578
10.361082077026367 10.295493125915527
10.293654441833496 10.245674133300781
10.22774600982666 10.197254180908203
10.163341522216797 10.150217056274414
10.100420951843262 10.104557991027832
10.038954734802246

In [None]:
# accuracyの計算
# loaderの定義