In [2]:
# Simple Character LSTM
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

In [3]:
# Preprocessing string data
# alphabet(0-25), space(26),..., start, end

string = "hello pytorch. how long can a rnn cell remember? show me your limit!"
chars = "abcdefghijklmnopqrstuvwxyz ?!.,:;01"
char_list = [i for i in chars]
char_len = len(char_list)

char_len

35

In [4]:
# String to onehot vector
# a -> [1 0 0 ... 0 0]



def string_to_onehot(string):
    start = np.zeros(shape=char_len ,dtype=int)
    end = np.zeros(shape=char_len ,dtype=int)

    start[-2] = 1
    end[-1] = 1

    for i in string:
        idx = char_list.index(i)
        zero = np.zeros(shape=char_len ,dtype=int)
        zero[idx]=1
        start = np.vstack([start,zero])
    output = np.vstack([start,end])
    return output

In [5]:
# Onehot vector to word
# [1 0 0 ... 0 0] -> a

def onehot_to_word(onehot_1):
    onehot = torch.Tensor.numpy(onehot_1)
    return char_list[onehot.argmax()]

In [6]:
# 하이퍼파라미터 설정
# 문자열을 단어 하나씩 잘러서 사용하는걸로 구현해서 batch_size 1로 고정입니다.
# batch_size가 1보다 큰 경우는 다음 실습코드에 있습니다.

batch_size = 1 #문장이 하나라서. 문자 여러개면 배치 사이즈 여러개로.

# seq_len는 바꿔도 학습은 되지만 테스트시 편의성을 위해 1로 설정했습니다.
seq_len = 1 #각 입력 독립적으로 처리, 2로 해주면 2개의 입력 한번에 처리.

# num_layers는 입력 형식에만 맞게 형태를 바꿔주면 됩니다.
num_layers = 3
input_size = char_len # 35개의 문자.
hidden_size = 35
lr = 0.01
num_epochs = 1000

one_hot = torch.from_numpy(string_to_onehot(string)).type_as(torch.FloatTensor())

print(one_hot.size()) #원래 문장 길이는 68, start랑 end가 합쳐져서 70 됨.

torch.Size([70, 35])


In [7]:
# RNN with 1 hidden layer

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size,num_layers):
        super(RNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size,hidden_size,num_layers) #파이토치안에서 그냥 LSTM 함수 만들어놓음. 참 쉽죠! 그죠! 으흐흐ㅡ흐흐하아하하ㅏㅏ..

    def forward(self,input_,hidden,cell):
        output,(hidden,cell) = self.lstm(input_,(hidden,cell))
        return output,hidden,cell

    def init_hidden_cell(self):
        hidden = torch.zeros(num_layers,batch_size,hidden_size)
        cell = torch.zeros(num_layers,batch_size,hidden_size)
        return hidden,cell

rnn = RNN(input_size,hidden_size, num_layers) # RNN계층 생성.

In [8]:
# Loss function & Optimizer
loss_func = nn.MSELoss()
optimizer = torch.optim.Adam(rnn.parameters(), lr=lr)

In [9]:
j=0
input_data = one_hot[j:j+seq_len].view(seq_len, batch_size, input_size) #seq_len값은 아까 1로.
#print(input_data.size())

hidden,cell = rnn.init_hidden_cell()
#print(hidden.size(),cell.size())

output, hidden,cell = rnn(input_data,hidden,cell)
#print(output.size(),hidden.size(),cell.size())

In [10]:
unroll_len = one_hot.size()[0] // seq_len -1 #맨 마지막 글자 뺸 개수 만큼.
for i in range(num_epochs):
    hidden,cell = rnn.init_hidden_cell()

    loss = 0
    for j in range(unroll_len): #총 69번.
        input_data = one_hot[j:j+seq_len].view(seq_len, batch_size, input_size) #pytorch란 문자면, p 그래서 그냥 j
        label = one_hot[j+1:j+seq_len+1].view(seq_len, batch_size, input_size) # 그 다음 y인데, 그래서 j+1(한칸 더 간거.)

        optimizer.zero_grad()

        output, hidden, cell = rnn(input_data,hidden,cell)
        loss += loss_func(output.view(1,-1), label.view(1,-1))

    loss.backward()
    optimizer.step()

    if i%10 ==0:
        print(loss)

tensor(2.4432, grad_fn=<AddBackward0>)
tensor(1.8139, grad_fn=<AddBackward0>)
tensor(1.7405, grad_fn=<AddBackward0>)
tensor(1.5782, grad_fn=<AddBackward0>)
tensor(1.3532, grad_fn=<AddBackward0>)
tensor(1.0168, grad_fn=<AddBackward0>)
tensor(0.7045, grad_fn=<AddBackward0>)
tensor(0.4263, grad_fn=<AddBackward0>)
tensor(0.2377, grad_fn=<AddBackward0>)
tensor(0.1467, grad_fn=<AddBackward0>)
tensor(0.0851, grad_fn=<AddBackward0>)
tensor(0.0533, grad_fn=<AddBackward0>)
tensor(0.0376, grad_fn=<AddBackward0>)
tensor(0.0289, grad_fn=<AddBackward0>)
tensor(0.0236, grad_fn=<AddBackward0>)
tensor(0.0196, grad_fn=<AddBackward0>)
tensor(0.0170, grad_fn=<AddBackward0>)
tensor(0.0153, grad_fn=<AddBackward0>)
tensor(0.0170, grad_fn=<AddBackward0>)
tensor(0.0145, grad_fn=<AddBackward0>)
tensor(0.0129, grad_fn=<AddBackward0>)
tensor(0.0122, grad_fn=<AddBackward0>)
tensor(0.0115, grad_fn=<AddBackward0>)
tensor(0.0107, grad_fn=<AddBackward0>)
tensor(0.0098, grad_fn=<AddBackward0>)
tensor(0.0092, grad_fn=<A

In [11]:
hidden,cell = rnn.init_hidden_cell()

for j in range(unroll_len-1):
    input_data = one_hot[j:j+1].view(1,batch_size,hidden_size)
    label = one_hot[j+1:j+1+1].view(1,batch_size,hidden_size)

    output, hidden, cell = rnn(input_data,hidden,cell)
    print(onehot_to_word(output.data),end="")

    #밑에 출력결과 보면, 같은 띄어쓰기인데, 예측값이 달라지는 것은, 기억력이 좋다는 것.(LSTM 좋다.)

hello pytorch. how long can a rnn cell remember? show me your limit!