In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

In [None]:
input_str = 'apple'
label_str = 'pple!'
char_vocab = sorted(list(set(input_str + label_str)))
vocab_size = len(char_vocab)
print(char_vocab)
print('문자 집합의 크기'.format(vocab_size))

In [None]:
input_size = vocab_size
hidden_size = 5
output_size = 5
learning_rate = 0.01

In [None]:
char_to_index = dict((c, i) for i, c in enumerate(char_vocab))
print(char_to_index)

In [None]:
index_to_char = {}

for key, value in char_to_index.items():
    index_to_char[value] = key

print(index_to_char)

In [None]:
x_data = [char_to_index[c] for c in input_str]
y_data = [char_to_index[c] for c in label_str]
print(x_data)
print(y_data)

In [None]:
x_tensor = torch.LongTensor(x_data)
y_tensor = torch.LongTensor(y_data)

print(x_tensor)
print(y_tensor)

x_tensor = x_tensor.unsqueeze(0)
y_tensor = y_tensor.unsqueeze(0)

print(x_tensor)
print(y_tensor)

In [None]:
x_data = x_tensor.cpu().detach().numpy()
y_data = y_tensor.cpu().detach().numpy()

print(x_data)
print(y_data)

In [None]:
x_one_hot = [np.eye(vocab_size)[x] for x in x_data]
print(x_one_hot)

In [None]:
X = torch.FloatTensor(x_one_hot)
Y = torch.LongTensor(y_data)

In [None]:
print('훈련 데이터의 크기 : {}'.format(X.shape))
print('레이블의 크기 : {}'.format(Y.shape))

In [None]:
class RnnNet(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RnnNet, self).__init__()
        self.rnn = nn.RNN(input_size, hidden_size, batch_first = True)
        self.fc = nn.Linear(hidden_size, output_size, bias = True)

    def forward(self, x):
        x, _status = self.rnn(x)
        x = self.fc(x)
        return x

In [None]:
net = RnnNet(input_size, hidden_size, output_size)

In [None]:
for x in net.parameters():
    print(x)

In [None]:
outputs = net(X)
print(outputs.shape)

In [None]:
print(outputs.view(-1, input_size).shape)

In [None]:
print(Y.shape)
print(Y.view(-1).shape)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), learning_rate)

for i in range(100):
    outputs = net(X)
    loss = criterion(outputs.view(-1, input_size), Y.view(-1))
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    result = outputs.data.numpy().argmax(axis = 2)
    result_str = ''.join([index_to_char[c] for c in np.squeeze(result)])
    print(i, "loss: ", loss.item(), "prediction: ", result, "true Y: ", y_data, "prediction str: ", result_str)

In [None]:
# 대용량 테스트 진행

sentence = ("if you want to build a ship, don't drum up people together to "
            "collect wood and don't assign them tasks and work, but rather "
            "teach them to long for the endless immensity of the sea.")

char_set = list(set(sentence))
char_dic = {c: i for i, c in enumerate(char_set)}
print(char_dic)

In [None]:
dic_size = len(char_dic)
print('문자 집합의 크기 : {}'.format(dic_size))

In [None]:
# 하이퍼파라미터 설정
hidden_size = dic_size
sequence_length = 8  # 임의 숫자 지정
learning_rate = 0.1

In [None]:
# 데이터 구성
x_data = []
y_data = []

for i in range(0, len(sentence) - sequence_length):
    x_str = sentence[i:i + sequence_length]
    y_str = sentence[i + 1: i + sequence_length + 1]
    print(i, x_str, '->', y_str)

    x_data.append([char_dic[c] for c in x_str])  # x str to index
    y_data.append([char_dic[c] for c in y_str])  # y str to index

In [None]:
print(x_data[0])
print(y_data[0])

In [None]:
x_one_hot = [np.eye(dic_size)[x] for x in x_data] # x 데이터는 원-핫 인코딩
X = torch.FloatTensor(x_one_hot)
Y = torch.LongTensor(y_data)

In [None]:
print('훈련 데이터의 크기 : {}'.format(X.shape))
print('레이블의 크기 : {}'.format(Y.shape))

In [None]:
print(X[0])

In [None]:
net = RnnNet(dic_size, hidden_size, 2) # 이번에는 층을 두 개 쌓습니다.

In [None]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), learning_rate)

In [None]:
outputs = net(X)
print(outputs.shape) # 3차원 텐서

In [None]:
print(outputs.view(-1, dic_size).shape) # 2차원 텐서로 변환.

In [None]:
print(Y.shape)
print(Y.view(-1).shape)

In [None]:
for i in range(100):
    optimizer.zero_grad()
    outputs = net(X) # (170, 10, 25) 크기를 가진 텐서를 매 에포크마다 모델의 입력으로 사용
    loss = criterion(outputs.view(-1, dic_size), Y.view(-1))
    loss.backward()
    optimizer.step()

    # results의 텐서 크기는 (170, 10)
    results = outputs.argmax(dim=2)
    predict_str = ""
    for j, result in enumerate(results):
        if j == 0: # 처음에는 예측 결과를 전부 가져오지만
            predict_str += ''.join([char_set[t] for t in result])
        else: # 그 다음에는 마지막 글자만 반복 추가
            predict_str += char_set[result[-1]]

    print(predict_str)