## 1. Cha RNN(1)

In [24]:
#import library
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

In [26]:
# input : apple, output : pple!
# this is simple example for understanding RNN
# make character set
input_str = 'apple'
label_str = 'pple!'
char_vocab = sorted(list(set(input_str+label_str)))
vocab_size = len(char_vocab)
print(char_vocab)
print ('size of character set : {}'.format(vocab_size))

['!', 'a', 'e', 'l', 'p']
size of character set : 5


In [9]:
# define hyperparameters
input_size = vocab_size # input size = character set size
hidden_size = 5
output_size = 5
learning_rate = 0.1

In [27]:
# give a character a unique integer index
char_to_index = dict((c, i) for i, c in enumerate(char_vocab)) 
print(char_to_index)

{'!': 0, 'a': 1, 'e': 2, 'l': 3, 'p': 4}


In [28]:
# make index_to_char to get results
index_to_char={}
for key, value in char_to_index.items():
    index_to_char[value] = key
print(index_to_char)

{0: '!', 1: 'a', 2: 'e', 3: 'l', 4: 'p'}


In [29]:
# map each character in the input data and label data to an integer
x_data = [char_to_index[c] for c in input_str]
y_data = [char_to_index[c] for c in label_str]
print(x_data)
print(y_data)

[1, 4, 4, 3, 2]
[4, 4, 3, 2, 0]


In [13]:
# add batch dimension bacause nn.RNN() basically takes a 3D tensor as input.
x_data = [x_data]
y_data = [y_data]
print(x_data)
print(y_data)

[[1, 4, 4, 3, 2]]
[[4, 4, 3, 2, 0]]


In [14]:
#one_hot_vector
x_one_hot = [np.eye(vocab_size)[x] for x in x_data]
print(x_one_hot)

[array([[0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 1.],
       [0., 0., 0., 1., 0.],
       [0., 0., 1., 0., 0.]])]


In [30]:
# change input data and label data to tensor
X = torch.FloatTensor(x_one_hot)
Y = torch.LongTensor(y_data)

In [31]:
print('training data size : {}'.format(X.shape))
print('label data size : {}'.format(Y.shape))

training data size : torch.Size([1, 5, 5])
label data size : torch.Size([5])


In [33]:
# define RNN model
class Net(torch.nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(Net, self).__init__()
        self.rnn = torch.nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc = torch.nn.Linear(hidden_size, output_size, bias=True)

    def forward(self, x):
        x, _status = self.rnn(x)
        x = self.fc(x)
        return x

In [34]:
net = Net(input_size, hidden_size, output_size)

In [35]:
outputs = net(X)
print(outputs.shape) # 3D tensor

torch.Size([1, 5, 5])


In [20]:
print(outputs.view(-1, input_size).shape)

torch.Size([5, 5])


In [21]:
print(Y.shape)
print(Y.view(-1).shape)

torch.Size([1, 5])
torch.Size([5])


In [36]:
# define loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), learning_rate)

In [23]:
#learning
for i in range(100):
    optimizer.zero_grad()
    outputs = net(X)
    loss = criterion(outputs.view(-1, input_size), Y.view(-1))
    loss.backward()
    optimizer.step()

    result = outputs.data.numpy().argmax(axis=2)
    result_str = ''.join([index_to_char[c] for c in np.squeeze(result)])
    print(i, "loss: ", loss.item(), "prediction: ", result, "true Y: ", y_data, "prediction str: ", result_str)

0 loss:  1.6046030521392822 prediction:  [[3 3 3 3 4]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  llllp
1 loss:  1.3746802806854248 prediction:  [[4 4 4 2 4]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  pppep
2 loss:  1.1756865978240967 prediction:  [[4 4 4 2 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  pppe!
3 loss:  0.9675639867782593 prediction:  [[4 4 4 2 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  pppe!
4 loss:  0.8329417109489441 prediction:  [[4 4 4 2 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  pppe!
5 loss:  0.6868112683296204 prediction:  [[4 4 4 2 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  pppe!
6 loss:  0.5739648938179016 prediction:  [[4 4 4 2 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  pppe!
7 loss:  0.4184468686580658 prediction:  [[4 4 3 2 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  pple!
8 loss:  0.29439520835876465 prediction:  [[4 4 3 2 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  pple!
9 loss:  0.22024472057819366 prediction:  [[4 4 3 2 0]

## 1. Cha RNN(2)

In [76]:
#import library
import torch
import torch.nn as nn
import torch.optim as optim

In [77]:
sentence = ("if you want to build a ship, don't drum up people together to "
            "collect wood and don't assign them tasks and work, but rather "
            "teach them to long for the endless immensity of the sea.")

In [78]:
char_set = list(set(sentence))
char_dic = {c: i for i, c in enumerate(char_set)}

In [79]:
print(char_dic)

{'s': 0, 'o': 1, 'c': 2, 'h': 3, 'd': 4, '.': 5, "'": 6, 'm': 7, ' ': 8, 'i': 9, 'u': 10, ',': 11, 'y': 12, 'w': 13, 'g': 14, 'p': 15, 'k': 16, 'a': 17, 'l': 18, 'n': 19, 'b': 20, 'f': 21, 'r': 22, 't': 23, 'e': 24}


In [80]:
dic_size = len(char_dic)
print('character set size : {}'.format(dic_size))

character set size : 25


In [42]:
# define hyperparameters
hidden_size = dic_size
sequence_length = 10
learning_rate = 0.1

In [43]:
x_data = []
y_data = []

for i in range(0, len(sentence) - sequence_length):
    x_str = sentence[i:i + sequence_length]
    y_str = sentence[i + 1: i + sequence_length + 1]
    print(i, x_str, '->', y_str)

    x_data.append([char_dic[c] for c in x_str])  # x str to index
    y_data.append([char_dic[c] for c in y_str])  # y str to index

0 if you wan -> f you want
1 f you want ->  you want 
2  you want  -> you want t
3 you want t -> ou want to
4 ou want to -> u want to 
5 u want to  ->  want to b
6  want to b -> want to bu
7 want to bu -> ant to bui
8 ant to bui -> nt to buil
9 nt to buil -> t to build
10 t to build ->  to build 
11  to build  -> to build a
12 to build a -> o build a 
13 o build a  ->  build a s
14  build a s -> build a sh
15 build a sh -> uild a shi
16 uild a shi -> ild a ship
17 ild a ship -> ld a ship,
18 ld a ship, -> d a ship, 
19 d a ship,  ->  a ship, d
20  a ship, d -> a ship, do
21 a ship, do ->  ship, don
22  ship, don -> ship, don'
23 ship, don' -> hip, don't
24 hip, don't -> ip, don't 
25 ip, don't  -> p, don't d
26 p, don't d -> , don't dr
27 , don't dr ->  don't dru
28  don't dru -> don't drum
29 don't drum -> on't drum 
30 on't drum  -> n't drum u
31 n't drum u -> 't drum up
32 't drum up -> t drum up 
33 t drum up  ->  drum up p
34  drum up p -> drum up pe
35 drum up pe -> rum up peo
36

In [82]:
print(x_data[0])
print(y_data[0])

[9, 21, 8, 12, 1, 10, 8, 13, 17, 19]
[21, 8, 12, 1, 10, 8, 13, 17, 19, 23]


In [83]:
x_one_hot = [np.eye(dic_size)[x] for x in x_data]
X = torch.FloatTensor(x_one_hot)
Y = torch.LongTensor(y_data)

In [84]:
print('training data size : {}'.format(X.shape))
print('label size : {}'.format(Y.shape))

training data size : torch.Size([170, 10, 25])
label size : torch.Size([170, 10])


In [85]:
print(X[0])

tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 1., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,

In [49]:
print(Y[0])

tensor([21,  8, 12,  1, 10,  8, 13, 17, 19, 23])


In [50]:
class Net(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, layers):
        super(Net, self).__init__()
        self.rnn = torch.nn.RNN(input_dim, hidden_dim, num_layers=layers, batch_first=True)
        self.fc = torch.nn.Linear(hidden_dim, hidden_dim, bias=True)

    def forward(self, x):
        x, _status = self.rnn(x)
        x = self.fc(x)
        return x

In [51]:
net = Net(dic_size, hidden_size, 2)

In [52]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), learning_rate)

In [53]:
outputs = net(X)
print(outputs.shape)

torch.Size([170, 10, 25])


In [54]:
print(outputs.view(-1, dic_size).shape)

torch.Size([1700, 25])


In [55]:
print(Y.shape)
print(Y.view(-1).shape)

torch.Size([170, 10])
torch.Size([1700])


In [56]:
for i in range(100):
    optimizer.zero_grad()
    outputs = net(X)
    loss = criterion(outputs.view(-1, dic_size), Y.view(-1))
    loss.backward()
    optimizer.step()

    results = outputs.argmax(dim=2)
    predict_str = ""
    for j, result in enumerate(results):
        if j == 0:
            predict_str += ''.join([char_set[t] for t in result])
        else:
            predict_str += char_set[result[-1]]

    print(predict_str)

daaa,y,adad,aa,d,,aa,d,,,aadaa,,ya,a,,,d,,d,,daa,dda,,daadaa,,y,ya,ya,daad,a,d,a,,ya,d,dda,,daadadd,d,,d,a,,a,daaa,,a,daa,dad,da,ddaadad,,y,,d,aa,daaad,daa,ddaa,ydd,daaad,aa,ad,dd
edppep,ebibaebebepb pebeebbpppedbapeppbyeb,ebepbep,bebeeb phpeyeybep,,ebeb,ebb,pedbapebip pbeebebebbpb,ebi,eepb,pbep,ebiebephbebebeebepebebedbaebeuheb b p,beb,ebebebebpep baeb bbe
ettttthretrthetrttrrtrhrthrttrhttttrshthrttrrtersttrerhtt ttntthttrttthtethttttrthtttrrethtttrrthtrrthttrthrhth trttthttertttreterhtrnhtrtrethrttetrertetttrhthrhtrhrehhttrhttrthtn
 uflo ky tseo eo t et  e  t t et teo t t  t et e  oee  eo toee  t et eo ttet  e  tt t ettett et toes  o t to t  t et  t ee t eet o eo toee  t ee  t eo tole  t et ttt  eotet et e l
 o lo dolttdddet ttedlddoltdtdtt tteddddtddldtletltleoleddddldoleettldeldddedltlddtdtltdtdddldtldtdetltddtdtdll tldt lt ddt dldtltddeldtlttltddeldt ddddtletltddtltdddedtddtodtlddl
   a  t eo to e  o  t to e t e o oo tt t t ot  o t e  eao t tt  o  o eo t t   o et t o tot t eo e tt

g you want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sea.
m you want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sea.
m you want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sea.
f you want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sea.
f you want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sea.
t you want to build a ship, don't drum up people together to collect wood and don't assign them task

## 2. Word RNN

In [57]:
import torch
import torch.nn as nn
import torch.optim as optim

In [58]:
sentence = "Repeat is the best medicine for memory".split()

In [59]:
vocab = list(set(sentence))
print(vocab)

['is', 'medicine', 'memory', 'for', 'best', 'the', 'Repeat']


In [60]:
word2index = {tkn: i for i, tkn in enumerate(vocab, 1)}
word2index['<unk>']=0

In [61]:
print(word2index)

{'is': 1, 'medicine': 2, 'memory': 3, 'for': 4, 'best': 5, 'the': 6, 'Repeat': 7, '<unk>': 0}


In [62]:
print(word2index['memory'])

3


In [63]:
index2word = {v: k for k, v in word2index.items()}
print(index2word)

{1: 'is', 2: 'medicine', 3: 'memory', 4: 'for', 5: 'best', 6: 'the', 7: 'Repeat', 0: '<unk>'}


In [64]:
print(index2word[2])

medicine


In [65]:
def build_data(sentence, word2index):
    encoded = [word2index[token] for token in sentence]
    input_seq, label_seq = encoded[:-1], encoded[1:]
    input_seq = torch.LongTensor(input_seq).unsqueeze(0)
    label_seq = torch.LongTensor(label_seq).unsqueeze(0)
    return input_seq, label_seq

In [66]:
X, Y = build_data(sentence, word2index)

In [67]:
print(X)
print(Y)

tensor([[7, 1, 6, 5, 2, 4]])
tensor([[1, 6, 5, 2, 4, 3]])


In [68]:
class Net(nn.Module):
    def __init__(self, vocab_size, input_size, hidden_size, batch_first=True):
        super(Net, self).__init__()
        self.embedding_layer = nn.Embedding(num_embeddings=vocab_size,
                                            embedding_dim=input_size)
        self.rnn_layer = nn.RNN(input_size, hidden_size,
                                batch_first=batch_first)
        self.linear = nn.Linear(hidden_size, vocab_size)

    def forward(self, x):
        output = self.embedding_layer(x)
        output, hidden = self.rnn_layer(output)
        output = self.linear(output)
        return output.view(-1, output.size(2))

In [69]:
vocab_size = len(word2index)
input_size = 5
hidden_size = 20

In [70]:
model = Net(vocab_size, input_size, hidden_size, batch_first=True)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(params=model.parameters())

In [71]:
output = model(X)
print(output)

tensor([[ 0.0700, -0.3024,  0.0092, -0.0520,  0.2978, -0.2156,  0.0899,  0.0271],
        [-0.0531, -0.1779,  0.0213, -0.3063,  0.0517,  0.0114, -0.2487,  0.3695],
        [ 0.0617, -0.2082,  0.0437, -0.0776,  0.3013, -0.0668, -0.0947,  0.1383],
        [-0.0770, -0.1891,  0.1045, -0.1660,  0.1797,  0.1060, -0.2883,  0.2868],
        [-0.0204,  0.3934, -0.1830, -0.4981, -0.7915,  0.3647, -0.1341,  0.1486],
        [-0.0738, -0.0184, -0.0733, -0.1485, -0.0581, -0.0052, -0.0504,  0.3315]],
       grad_fn=<ViewBackward0>)


In [72]:
print(output.shape)

torch.Size([6, 8])


In [73]:
decode = lambda y: [index2word.get(x) for x in y]

In [74]:
for step in range(201):
    optimizer.zero_grad()
    output = model(X)
    loss = loss_function(output, Y.view(-1))
    loss.backward()
    optimizer.step()
    if step % 40 == 0:
        print("[{:02d}/201] {:.4f} ".format(step+1, loss))
        pred = output.softmax(-1).argmax(-1).tolist()
        print(" ".join(["Repeat"] + decode(pred)))
        print()

[01/201] 2.3212 
Repeat for Repeat for Repeat is Repeat

[41/201] 1.7671 
Repeat is medicine best medicine best memory

[81/201] 1.0808 
Repeat is the best medicine for memory

[121/201] 0.5512 
Repeat is the best medicine for memory

[161/201] 0.2987 
Repeat is the best medicine for memory

[201/201] 0.1801 
Repeat is the best medicine for memory

