<a href="https://colab.research.google.com/github/JangHanjun/Natural_Language_Programming/blob/main/Chapter%2011/CharRNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

In [2]:
input_str = 'apple'
label_str = 'pple!'

char_vocab = sorted(list(set(input_str + label_str)))
vocab_size = len(char_vocab)

print(vocab_size)

5


In [3]:
input_size = vocab_size
hidden_size = 5
output_size = 5
learning_rate = 0.1

In [11]:
char_to_index = dict((c, i) for i, c in enumerate(char_vocab))
print(char_to_index)

{'!': 0, 'a': 1, 'e': 2, 'l': 3, 'p': 4}


In [12]:
index_to_char = {}
for key, value in char_to_index.items():
  index_to_char[value] = key
print(index_to_char)

{0: '!', 1: 'a', 2: 'e', 3: 'l', 4: 'p'}


In [13]:
x_data = [char_to_index[c] for c in input_str]
y_data = [char_to_index[c] for c in label_str]

print(x_data)
print(y_data)

[1, 4, 4, 3, 2]
[4, 4, 3, 2, 0]


In [14]:
x_data = [x_data]
y_data = [y_data]

print(x_data)
print(y_data)

[[1, 4, 4, 3, 2]]
[[4, 4, 3, 2, 0]]


In [15]:
x_one_hot = [np.eye(vocab_size)[x] for x in x_data]
print(x_one_hot)

[array([[0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 1.],
       [0., 0., 0., 1., 0.],
       [0., 0., 1., 0., 0.]])]


In [29]:
X = torch.FloatTensor(x_one_hot)
Y = torch.LongTensor(y_data)

print('훈련 데이터의 크기 : {}'.format(X.shape))
print('레이블의 크기 : {}'.format(Y.shape))

훈련 데이터의 크기 : torch.Size([1, 5, 5])
레이블의 크기 : torch.Size([1, 5])


In [30]:
class Net(torch.nn.Module):
  def __init__(self, input_size, hidden_size, output_size):
    super(Net, self).__init__()
    self.rnn = torch.nn.RNN(input_size, hidden_size, batch_first=True) # RNN cell
    self.fc = torch.nn.Linear(hidden_size, output_size, bias=True) # 출력층
  
  def forward(self, x):
    x, _status = self.rnn(x)
    x = self.fc(x)
    return x

In [31]:
net = Net(input_size, hidden_size, output_size)

In [32]:
outputs = net(X)
print(outputs.shape) # torch.Size([1, 5, 5])

torch.Size([1, 5, 5])


In [33]:
print(outputs.view(-1, input_size).shape)

torch.Size([5, 5])


In [34]:
print(Y.shape)
print(Y.view(-1).shape)

torch.Size([1, 5])
torch.Size([5])


In [35]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), learning_rate)

In [36]:
for i in range(100):
  optimizer.zero_grad()
  outputs = net(X)
  loss = criterion(outputs.view(-1, input_size), Y.view(-1)) # view로 batch 차원 제거

  loss.backward() # 기울기 계산
  optimizer.step()

  result = outputs.data.numpy().argmax(axis=2)
  result_str = ''.join([index_to_char[c] for c in np.squeeze(result)])
  print(i, "loss: ", loss.item(), "prediction: ", result, "true Y: ", y_data, "prediction str: ", result_str)

0 loss:  1.5881786346435547 prediction:  [[2 2 3 3 2]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  eelle
1 loss:  1.3469537496566772 prediction:  [[2 2 3 2 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  eele!
2 loss:  1.0998705625534058 prediction:  [[4 4 3 2 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  pple!
3 loss:  0.8591639399528503 prediction:  [[4 4 3 2 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  pple!
4 loss:  0.6333431005477905 prediction:  [[4 4 3 2 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  pple!
5 loss:  0.4380223751068115 prediction:  [[4 4 3 2 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  pple!
6 loss:  0.29186779260635376 prediction:  [[4 4 3 2 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  pple!
7 loss:  0.19353574514389038 prediction:  [[4 4 3 2 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  pple!
8 loss:  0.13088056445121765 prediction:  [[4 4 3 2 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  pple!
9 loss:  0.09111857414245605 prediction:  [[4 4 3 2 