## Import Required Libraries

In [1]:
import torch
import torch.optim as optim
import numpy as np

## Random Seed 고정

In [2]:
# Random seed to make results deterministic and reproducible
torch.manual_seed(0)

<torch._C.Generator at 0x1cd7a3d9a70>

## 'HiHello' Example 

In [3]:
# declare dictionary
# 문자들을 Index로 표현해보자
char_set = ['h','i','e','l','o']
"""
'h' - 0
'i' - 1
'e' - 2
'l' - 3
'o' - 4
"""

"\n'h' - 0\n'i' - 1\n'e' - 2\n'l' - 3\n'o' - 4\n"

In [4]:
# hyper parameters
input_size = len(char_set)
hidden_size = len(char_set)
learning_rate = 0.1

### Data Setting
- x_data를 이용해서 다음 올 문자인 y_data를 예측

In [5]:
x_data = [[0, 1, 0, 2, 3, 3]]  # h i h e l l
y_data = [[1, 0, 2, 3, 3, 4]]  # i h e l l o

### One-hot encoding을 이용해서 x_data 바꾸기

In [6]:
x_one_hot = [[[1, 0, 0, 0, 0],
              [0, 1, 0, 0, 0],
              [1, 0, 0, 0, 0],
              [0, 0, 1, 0, 0],
              [0, 0, 0, 1, 0],
              [0, 0, 0, 1, 0]]]

### Torch의 Tensor 변수로 바꾸기

In [7]:
# transform as torch tensor variable
X = torch.FloatTensor(x_one_hot)
Y = torch.LongTensor(y_data)

## RNN 정의

### RNN의 Parameter
- input_size : Input data의 갯수
- hidden_size : Output data의 갯수
- batch_first : Batch dimenstion이 가장 앞으로 오도록 한다.

In [8]:
rnn = torch.nn.RNN(input_size,hidden_size, batch_first = True)

In [9]:
# loss function
criterion = torch.nn.CrossEntropyLoss()

# Optimizer
optimizer = optim.Adam(rnn.parameters(), lr = learning_rate)

## Training

rnn(X)의 ouput
> output, _status(다음 Block이 있을 시 넘겨줄 hidden state에 대한 output)

output data의 Shape
> (batch,sequence,feature)
> - Batch - batch Size
> - Sequence - 만들 문장의 글자 수
> - Feature - 문장을 이루고 있는 글자의 갯수

In [10]:
# start training
for i in range(100):
    # 결과 예측
    outputs, _status = rnn(X)
    # Loss 계산
    loss = criterion(outputs.view(-1, input_size), Y.view(-1))
    
    optimizer.zero_grad()  # 기울기 초기화
    loss.backward()        # Backpropagation
    optimizer.step()       # Weight Update
    
    result = outputs.data.numpy().argmax(axis = 2)  # result.shape = (1,6) - (batch,sequence,feature)
    # 원래 result의 shape은 (1,6,5)지만 argmax를 통해서 2번째 차원에 대해 최대값을 구했기 때문에 (1,6)이 된다.
    result_str = ''.join([char_set[c] for c in np.squeeze(result)])      # result_str = 'ilello'
    # squeeze는 차원이 1인 부분을 없애주는 함수
    # python의 join함수를 통해 result에 관한 str을 만든다.
    print(i+1, ": loss: ", loss.item(), "prediction: ", result)
    print("    true Y: ", y_data, "prediction str: ", result_str)

1 : loss:  1.7802648544311523 prediction:  [[1 1 1 1 1 1]]
    true Y:  [[1, 0, 2, 3, 3, 4]] prediction str:  iiiiii
2 : loss:  1.4931954145431519 prediction:  [[1 4 1 1 4 4]]
    true Y:  [[1, 0, 2, 3, 3, 4]] prediction str:  ioiioo
3 : loss:  1.3337129354476929 prediction:  [[1 3 2 3 1 4]]
    true Y:  [[1, 0, 2, 3, 3, 4]] prediction str:  ilelio
4 : loss:  1.2152955532073975 prediction:  [[2 3 2 3 3 3]]
    true Y:  [[1, 0, 2, 3, 3, 4]] prediction str:  elelll
5 : loss:  1.1131411790847778 prediction:  [[2 3 2 3 3 3]]
    true Y:  [[1, 0, 2, 3, 3, 4]] prediction str:  elelll
6 : loss:  1.024188756942749 prediction:  [[2 3 2 3 3 4]]
    true Y:  [[1, 0, 2, 3, 3, 4]] prediction str:  elello
7 : loss:  0.9573155045509338 prediction:  [[2 3 2 3 3 4]]
    true Y:  [[1, 0, 2, 3, 3, 4]] prediction str:  elello
8 : loss:  0.9102011322975159 prediction:  [[2 0 2 3 3 4]]
    true Y:  [[1, 0, 2, 3, 3, 4]] prediction str:  ehello
9 : loss:  0.8731772303581238 prediction:  [[1 0 2 3 3 4]]
    tr

## Charseq Example (Generalization version)

In [11]:
sample = " if you want you"

In [12]:
# make dictionary
char_set = list(set(sample))
char_dic = {c: i for i, c in enumerate(char_set)}
print(char_dic)

{'i': 0, 'n': 1, 'u': 2, 'a': 3, 'f': 4, ' ': 5, 't': 6, 'y': 7, 'w': 8, 'o': 9}


In [13]:
# hyper parameters
dic_size = len(char_dic)
hidden_size = len(char_dic)
learning_rate = 0.1

### Data Setting
- np.eye : 단위행렬을 만들어주는 함수

In [14]:
# data setting
sample_idx = [char_dic[c] for c in sample]
x_data = [sample_idx[:-1]]  # 맨 마지막 문자 제외
x_one_hot = [np.eye(dic_size)[x] for x in x_data]
y_data = [sample_idx[1:]]    # 맨 첫번째 문자 제외

### Torch의 Tensor 변수로 바꾸기

In [15]:
# transform as torch tensor variable
X = torch.FloatTensor(x_one_hot)
Y = torch.LongTensor(y_data)

## RNN 정의

In [16]:
# declare RNN
rnn = torch.nn.RNN(dic_size, hidden_size, batch_first=True)

In [17]:
# loss & optimizer setting
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(rnn.parameters(), learning_rate)

## Training

In [18]:
# start training
for i in range(50):
    
    outputs, _status = rnn(X)
    loss = criterion(outputs.view(-1, dic_size), Y.view(-1))  # .view를 통해 batch dimension이 앞에 오도록 한다.
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    result = outputs.data.numpy().argmax(axis=2)
    result_str = ''.join([char_set[c] for c in np.squeeze(result)])
    print(i+1, ": loss: ", loss.item(), "prediction: ", result)
    print("    true Y: ", y_data, "prediction str: ", result_str)

1 : loss:  2.4410319328308105 prediction:  [[1 8 1 3 8 9 3 1 9 3 3 8 1 8 3]]
    true Y:  [[0, 4, 5, 7, 9, 2, 5, 8, 3, 1, 6, 5, 7, 9, 2]] prediction str:  nwnawoanoaawnwa
2 : loss:  2.076482057571411 prediction:  [[7 3 9 2 9 2 9 7 9 9 7 9 7 9 2]]
    true Y:  [[0, 4, 5, 7, 9, 2, 5, 8, 3, 1, 6, 5, 7, 9, 2]] prediction str:  yaououoyooyoyou
3 : loss:  1.808059811592102 prediction:  [[7 4 5 7 9 2 2 7 5 1 7 9 7 9 2]]
    true Y:  [[0, 4, 5, 7, 9, 2, 5, 8, 3, 1, 6, 5, 7, 9, 2]] prediction str:  yf youuy nyoyou
4 : loss:  1.6273118257522583 prediction:  [[7 5 5 7 9 2 5 7 5 1 7 5 7 3 2]]
    true Y:  [[0, 4, 5, 7, 9, 2, 5, 8, 3, 1, 6, 5, 7, 9, 2]] prediction str:  y  you y ny yau
5 : loss:  1.5096145868301392 prediction:  [[7 5 5 7 9 2 5 7 5 1 6 5 7 9 2]]
    true Y:  [[0, 4, 5, 7, 9, 2, 5, 8, 3, 1, 6, 5, 7, 9, 2]] prediction str:  y  you y nt you
6 : loss:  1.4093328714370728 prediction:  [[7 5 5 7 9 2 5 7 5 1 6 5 7 9 2]]
    true Y:  [[0, 4, 5, 7, 9, 2, 5, 8, 3, 1, 6, 5, 7, 9, 2]] predictio