# https://pytorch.org/tutorials/beginner/nlp/sequence_models_tutorial.html

In [10]:
# Author: Robert Guthrie

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

<torch._C.Generator at 0x17eb95c6db0>

In [19]:
lstm = nn.LSTM(3, 3)  # Input dim is 3, output dim is 3
inputs = [torch.randn(1, 3) for _ in range(5)]  # make a sequence of length 5
print(lstm, inputs)

# initialize the hidden state.
hidden = (torch.randn(1, 1, 3),
          torch.randn(1, 1, 3))
print(hidden)

LSTM(3, 3) [tensor([[1.1393, 0.0582, 0.8405]]), tensor([[-0.3500,  0.3181, -0.1518]]), tensor([[ 0.4300, -1.1828, -0.7735]]), tensor([[-0.9992, -0.5900, -1.7509]]), tensor([[-1.4686, -0.8066,  0.1316]])]
(tensor([[[-0.3633,  0.4898, -0.2826]]]), tensor([[[-1.4921,  2.7120, -2.0804]]]))


In [11]:
for i in inputs:
    # Step through the sequence one element at a time.
    # after each step, hidden contains the hidden state.
    out, hidden = lstm(i.view(1, 1, -1), hidden)

# alternatively, we can do the entire sequence all at once.
# the first value returned by LSTM is all of the hidden states throughout
# the sequence. the second is just the most recent hidden state
# (compare the last slice of "out" with "hidden" below, they are the same)
# The reason for this is that:
# "out" will give you access to all hidden states in the sequence
# "hidden" will allow you to continue the sequence and backpropagate,
# by passing it as an argument  to the lstm at a later time
# Add the extra 2nd dimension
inputs = torch.cat(inputs).view(len(inputs), 1, -1)
hidden = (torch.randn(1, 1, 3), torch.randn(1, 1, 3))  # clean out hidden state
out, hidden = lstm(inputs, hidden)
print(out)
print(hidden)

tensor([[[-0.0187,  0.1713, -0.2944]],

        [[-0.3521,  0.1026, -0.2971]],

        [[-0.3191,  0.0781, -0.1957]],

        [[-0.1634,  0.0941, -0.1637]],

        [[-0.3368,  0.0959, -0.0538]]], grad_fn=<StackBackward>)
(tensor([[[-0.3368,  0.0959, -0.0538]]], grad_fn=<StackBackward>), tensor([[[-0.9825,  0.4715, -0.0633]]], grad_fn=<StackBackward>))


In [12]:
def prepare_sequence(seq, to_ix):
    idxs = [to_ix[w] for w in seq]
    return torch.tensor(idxs, dtype=torch.long)


training_data = [
    ("The dog ate the apple".split(), ["DET", "NN", "V", "DET", "NN"]),
    ("Everybody read that book".split(), ["NN", "V", "DET", "NN"])
]
word_to_ix = {}
for sent, tags in training_data:
    for word in sent:
        if word not in word_to_ix:
            word_to_ix[word] = len(word_to_ix)
print(word_to_ix)
tag_to_ix = {"DET": 0, "NN": 1, "V": 2}

# These will usually be more like 32 or 64 dimensional.
# We will keep them small, so we can see how the weights change as we train.
EMBEDDING_DIM = 6
HIDDEN_DIM = 6

{'The': 0, 'dog': 1, 'ate': 2, 'the': 3, 'apple': 4, 'Everybody': 5, 'read': 6, 'that': 7, 'book': 8}


In [13]:
class LSTMTagger(nn.Module):

    def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size):
        super(LSTMTagger, self).__init__()
        self.hidden_dim = hidden_dim

        self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)

        # The LSTM takes word embeddings as inputs, and outputs hidden states
        # with dimensionality hidden_dim.
        self.lstm = nn.LSTM(embedding_dim, hidden_dim)

        # The linear layer that maps from hidden state space to tag space
        self.hidden2tag = nn.Linear(hidden_dim, tagset_size)

    def forward(self, sentence):
        embeds = self.word_embeddings(sentence)
        lstm_out, _ = self.lstm(embeds.view(len(sentence), 1, -1))
        tag_space = self.hidden2tag(lstm_out.view(len(sentence), -1))
        tag_scores = F.log_softmax(tag_space, dim=1)
        return tag_scores

In [17]:
model = LSTMTagger(EMBEDDING_DIM, HIDDEN_DIM, len(word_to_ix), len(tag_to_ix))
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

# See what the scores are before training
# Note that element i,j of the output is the score for tag j for word i.
# Here we don't need to train, so the code is wrapped in torch.no_grad()
with torch.no_grad():
    inputs = prepare_sequence(training_data[1][0], word_to_ix)
    print(inputs)
    tag_scores = model(inputs)
    print(tag_scores)

tensor([5, 6, 7, 8])
tensor([[-1.3328, -1.1364, -0.8788],
        [-1.3647, -1.0470, -0.9325],
        [-1.4329, -0.9545, -0.9771],
        [-1.5074, -0.9057, -0.9828]])


In [15]:
training_data

[(['The', 'dog', 'ate', 'the', 'apple'], ['DET', 'NN', 'V', 'DET', 'NN']),
 (['Everybody', 'read', 'that', 'book'], ['NN', 'V', 'DET', 'NN'])]

In [20]:
for epoch in range(300):  # again, normally you would NOT do 300 epochs, it is toy data
    for sentence, tags in training_data:
        # Step 1. Remember that Pytorch accumulates gradients.
        # We need to clear them out before each instance
        model.zero_grad()

        # Step 2. Get our inputs ready for the network, that is, turn them into
        # Tensors of word indices.
        sentence_in = prepare_sequence(sentence, word_to_ix)
        targets = prepare_sequence(tags, tag_to_ix)

        # Step 3. Run our forward pass.
        print(sentence_in)
        tag_scores = model(sentence_in)

        # Step 4. Compute the loss, gradients, and update the parameters by
        #  calling optimizer.step()
        loss = loss_function(tag_scores, targets)
        loss.backward()
        optimizer.step()

# See what the scores are after training
with torch.no_grad():
    inputs = prepare_sequence(training_data[0][0], word_to_ix)
    tag_scores = model(inputs)

    # The sentence is "the dog ate the apple".  i,j corresponds to score for tag j
    # for word i. The predicted tag is the maximum scoring tag.
    # Here, we can see the predicted sequence below is 0 1 2 0 1
    # since 0 is index of the maximum value of row 1,
    # 1 is the index of maximum value of row 2, etc.
    # Which is DET NOUN VERB DET NOUN, the correct sequence!
    print(tag_scores)

tensor([0, 1, 2, 3, 4])
tensor([5, 6, 7, 8])
tensor([0, 1, 2, 3, 4])
tensor([5, 6, 7, 8])
tensor([0, 1, 2, 3, 4])
tensor([5, 6, 7, 8])
tensor([0, 1, 2, 3, 4])
tensor([5, 6, 7, 8])
tensor([0, 1, 2, 3, 4])
tensor([5, 6, 7, 8])
tensor([0, 1, 2, 3, 4])
tensor([5, 6, 7, 8])
tensor([0, 1, 2, 3, 4])
tensor([5, 6, 7, 8])
tensor([0, 1, 2, 3, 4])
tensor([5, 6, 7, 8])
tensor([0, 1, 2, 3, 4])
tensor([5, 6, 7, 8])
tensor([0, 1, 2, 3, 4])
tensor([5, 6, 7, 8])
tensor([0, 1, 2, 3, 4])
tensor([5, 6, 7, 8])
tensor([0, 1, 2, 3, 4])
tensor([5, 6, 7, 8])
tensor([0, 1, 2, 3, 4])
tensor([5, 6, 7, 8])
tensor([0, 1, 2, 3, 4])
tensor([5, 6, 7, 8])
tensor([0, 1, 2, 3, 4])
tensor([5, 6, 7, 8])
tensor([0, 1, 2, 3, 4])
tensor([5, 6, 7, 8])
tensor([0, 1, 2, 3, 4])
tensor([5, 6, 7, 8])
tensor([0, 1, 2, 3, 4])
tensor([5, 6, 7, 8])
tensor([0, 1, 2, 3, 4])
tensor([5, 6, 7, 8])
tensor([0, 1, 2, 3, 4])
tensor([5, 6, 7, 8])
tensor([0, 1, 2, 3, 4])
tensor([5, 6, 7, 8])
tensor([0, 1, 2, 3, 4])
tensor([5, 6, 7, 8])
tensor([0,

tensor([0, 1, 2, 3, 4])
tensor([5, 6, 7, 8])
tensor([0, 1, 2, 3, 4])
tensor([5, 6, 7, 8])
tensor([0, 1, 2, 3, 4])
tensor([5, 6, 7, 8])
tensor([0, 1, 2, 3, 4])
tensor([5, 6, 7, 8])
tensor([0, 1, 2, 3, 4])
tensor([5, 6, 7, 8])
tensor([0, 1, 2, 3, 4])
tensor([5, 6, 7, 8])
tensor([0, 1, 2, 3, 4])
tensor([5, 6, 7, 8])
tensor([0, 1, 2, 3, 4])
tensor([5, 6, 7, 8])
tensor([0, 1, 2, 3, 4])
tensor([5, 6, 7, 8])
tensor([0, 1, 2, 3, 4])
tensor([5, 6, 7, 8])
tensor([0, 1, 2, 3, 4])
tensor([5, 6, 7, 8])
tensor([0, 1, 2, 3, 4])
tensor([5, 6, 7, 8])
tensor([0, 1, 2, 3, 4])
tensor([5, 6, 7, 8])
tensor([0, 1, 2, 3, 4])
tensor([5, 6, 7, 8])
tensor([0, 1, 2, 3, 4])
tensor([5, 6, 7, 8])
tensor([0, 1, 2, 3, 4])
tensor([5, 6, 7, 8])
tensor([0, 1, 2, 3, 4])
tensor([5, 6, 7, 8])
tensor([0, 1, 2, 3, 4])
tensor([5, 6, 7, 8])
tensor([0, 1, 2, 3, 4])
tensor([5, 6, 7, 8])
tensor([0, 1, 2, 3, 4])
tensor([5, 6, 7, 8])
tensor([0, 1, 2, 3, 4])
tensor([5, 6, 7, 8])
tensor([0, 1, 2, 3, 4])
tensor([5, 6, 7, 8])
tensor([0,

# https://discuss.pytorch.org/t/example-of-many-to-one-lstm/1728/2

In [22]:
import torch
import torch.nn as nn
from torch.autograd import Variable

time_steps = 10
batch_size = 3
in_size = 5
classes_no = 7

model = nn.LSTM(in_size, classes_no, 2)
input_seq = Variable(torch.randn(time_steps, batch_size, in_size))
output_seq, _ = model(input_seq)
last_output = output_seq[-1] # very last one

loss = nn.CrossEntropyLoss()
target = Variable(torch.LongTensor(batch_size).random_(0, classes_no-1))
err = loss(last_output, target)
err.backward()

In [23]:
model

LSTM(5, 7, num_layers=2)

In [27]:
input_seq.shape, output_seq.shape, last_output.shape

(torch.Size([10, 3, 5]), torch.Size([10, 3, 7]), torch.Size([3, 7]))

In [26]:
input_seq, last_output

(tensor([[[ 0.0746,  1.3236, -1.0770, -0.1188, -0.0200],
          [-0.5350,  1.1625,  0.5232,  1.4601,  0.4712],
          [ 0.7468,  1.2244,  0.9337, -0.1174, -0.4615]],
 
         [[-1.0121,  0.7401,  0.2174, -0.3218,  1.5069],
          [-0.1239, -0.6696, -0.7555, -0.7609, -0.1557],
          [-1.0281, -2.2458, -0.8262, -0.4284, -0.3218]],
 
         [[-1.7309,  1.3368,  0.1605, -0.7912, -1.2292],
          [ 1.4830,  1.1118, -1.3131,  0.6120,  0.4511],
          [ 0.7085, -0.2305,  0.1662,  2.0108,  1.0082]],
 
         [[ 1.2507,  0.2609, -1.2670,  1.1160,  1.3044],
          [ 0.6712, -0.7129, -1.1354,  1.0170, -1.3190],
          [-0.6627,  1.1002,  0.9063,  1.0554, -0.6508]],
 
         [[ 2.4010,  0.0216, -1.1332, -1.4655, -0.0741],
          [-0.4102, -1.4597, -0.1239,  0.6322, -0.7460],
          [ 1.4955,  1.5451,  0.6999,  1.4015, -0.2289]],
 
         [[-0.6014, -0.4049,  0.6636,  0.3970, -1.4269],
          [ 0.6887,  0.4419, -0.9640,  1.2935,  0.4637],
          [ 0.44

In [42]:
rnn = nn.LSTM(10, 20, 2)
input = torch.randn(5, 3, 10)
h0 = torch.randn(2, 3, 20)
c0 = torch.randn(2, 3, 20)
output, (hn, cn) = rnn(input, (h0, c0))

In [38]:
output.shape

torch.Size([5, 3, 20])

In [40]:
rnn?

# https://wingnim.tistory.com/37

## 단어 한글자씩 넣어주는 과정에서 hihello라는 문장을 예측하는 모델. 예제

In [3]:
import torch 
import torch.nn as nn 
import torch.nn.functional as F 
import torch.optim as optim

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 
num_classes = 5 
input_size = 5 # one-hot size 
hidden_size = 5 #output from the cell. 바로 결과 예측을 위해서 5로 설정 
batch_size = 1 # sentence의 개수 (단어개수) 
sequence_length = 1 # 이번에는 한번에 하나씩 해본다 
num_layers = 1 # one layer rnn이다. (아직 안 다룬 개념)

In [4]:
idx2char = ['h','i','e','l','o'] 
x_data = [0,1,0,2,3,3] #hihell 
y_data = [1,0,2,3,3,4] #ihello 

In [5]:
one_hot_lookup = [[1,0,0,0,0], #h 
                  [0,1,0,0,0], #e 
                  [0,0,1,0,0], #l
                  [0,0,0,1,0], #l
                  [0,0,0,0,1]] #o
x_one_hot = [one_hot_lookup[x] for x in x_data] 
inputs = torch.Tensor(x_one_hot) 
labels = torch.LongTensor(y_data)

In [6]:
print(inputs) # 'hihell' -> one hot (6 x 5)
print(labels)

tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [1., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.]])
tensor([1, 0, 2, 3, 3, 4])


### batch 없이 한번에 하나의 time slot (sequence) 만 feed 하는 구조

In [7]:
class Model(nn.Module): 
    def __init__(self): 
        super(Model,self).__init__() 
        self.rnn=nn.RNN(input_size=input_size, hidden_size=hidden_size, batch_first=True) 
        
    def forward(self, x, hidden): 
        #input x 를 (batch_size,sequence_length,input_size)로 reshape함 
        #just for make sure이라고 하심. 
        x=x.view(batch_size, sequence_length, input_size) 
        
        #Propagate input through RNN 
        #Input:(batch,seq_len,input_size) 
        out, hidden = self.rnn(x, hidden) 
        
        #for make sure, output이 N * 5 shape을 따르게 하기 위해서 
        out = out.view(-1, num_classes) 
        return hidden, out 
    
    def init_hidden(self): 
        #initialize hidden and cell states 
        return torch.zeros(num_layers, batch_size, hidden_size).to(device)

In [10]:
model = Model().to(device) 
criterion = nn.CrossEntropyLoss() #1 
optimizer = optim.Adam(model.parameters(),lr = 0.1) 
for epoch in range(30): 
    optimizer.zero_grad() 
    loss = 0 
    hidden = model.init_hidden() #2 
    
    print("predicted string : ",end="") 
    inputs = inputs.to(device) 
    labels = labels.to(device) 
    for input,label in zip(inputs,labels): #3 
        #print("input : {}, label : {}".format(input, label))
        labels = labels.view(-1,1) 
        hidden,output = model(input,hidden) 
        #print("hidden : {}, output : {}".format(hidden, output))
        val,idx = output.max(1) 
        #print("val : {}, idx : {}".format(val, idx))
        print(idx2char[idx.data[0]],end="") 
        loss += criterion(output,label) ### WHY += ?? ###
        
    print(", epoch: %d, loss: %1.3f" % (epoch+1,loss.data)) 
    loss.backward() 
    optimizer.step()


predicted string : input : tensor([1., 0., 0., 0., 0.]), label : tensor([1])
hidden : tensor([[[ 0.0464,  0.6295, -0.5829, -0.4245, -0.7376]]],
       grad_fn=<StackBackward>), output : tensor([[ 0.0464,  0.6295, -0.5829, -0.4245, -0.7376]], grad_fn=<ViewBackward>)
val : tensor([0.6295], grad_fn=<MaxBackward0>), idx : tensor([1])
iinput : tensor([0., 1., 0., 0., 0.]), label : tensor([0])
hidden : tensor([[[ 0.1843,  0.2523, -0.4583, -0.3180, -0.6311]]],
       grad_fn=<StackBackward>), output : tensor([[ 0.1843,  0.2523, -0.4583, -0.3180, -0.6311]], grad_fn=<ViewBackward>)
val : tensor([0.2523], grad_fn=<MaxBackward0>), idx : tensor([1])
iinput : tensor([1., 0., 0., 0., 0.]), label : tensor([2])
hidden : tensor([[[ 0.1459,  0.5832, -0.4185, -0.6585, -0.7285]]],
       grad_fn=<StackBackward>), output : tensor([[ 0.1459,  0.5832, -0.4185, -0.6585, -0.7285]], grad_fn=<ViewBackward>)
val : tensor([0.5832], grad_fn=<MaxBackward0>), idx : tensor([1])
iinput : tensor([0., 0., 1., 0., 0.]), l

       grad_fn=<StackBackward>), output : tensor([[-0.9113, -0.3961, -0.3323,  0.8125,  0.3154]], grad_fn=<ViewBackward>)
val : tensor([0.8125], grad_fn=<MaxBackward0>), idx : tensor([3])
linput : tensor([0., 0., 0., 1., 0.]), label : tensor([4])
hidden : tensor([[[-0.6761, -0.1244, -0.6802,  0.9334,  0.4889]]],
       grad_fn=<StackBackward>), output : tensor([[-0.6761, -0.1244, -0.6802,  0.9334,  0.4889]], grad_fn=<ViewBackward>)
val : tensor([0.9334], grad_fn=<MaxBackward0>), idx : tensor([3])
l, epoch: 6, loss: 5.778
predicted string : input : tensor([1., 0., 0., 0., 0.]), label : tensor([1])
hidden : tensor([[[-0.7980,  0.2759, -0.4758, -0.6292, -0.6075]]],
       grad_fn=<StackBackward>), output : tensor([[-0.7980,  0.2759, -0.4758, -0.6292, -0.6075]], grad_fn=<ViewBackward>)
val : tensor([0.2759], grad_fn=<MaxBackward0>), idx : tensor([1])
iinput : tensor([0., 1., 0., 0., 0.]), label : tensor([0])
hidden : tensor([[[ 0.8744, -0.9612, -0.9710,  0.8839, -0.6453]]],
       grad_fn=

val : tensor([0.5681], grad_fn=<MaxBackward0>), idx : tensor([1])
iinput : tensor([0., 1., 0., 0., 0.]), label : tensor([0])
hidden : tensor([[[ 0.9689, -0.9890, -0.9953,  0.9138, -0.8858]]],
       grad_fn=<StackBackward>), output : tensor([[ 0.9689, -0.9890, -0.9953,  0.9138, -0.8858]], grad_fn=<ViewBackward>)
val : tensor([0.9689], grad_fn=<MaxBackward0>), idx : tensor([0])
hinput : tensor([1., 0., 0., 0., 0.]), label : tensor([2])
hidden : tensor([[[-0.9946, -0.6048,  0.9343, -0.9301, -0.9777]]],
       grad_fn=<StackBackward>), output : tensor([[-0.9946, -0.6048,  0.9343, -0.9301, -0.9777]], grad_fn=<ViewBackward>)
val : tensor([0.9343], grad_fn=<MaxBackward0>), idx : tensor([2])
einput : tensor([0., 0., 1., 0., 0.]), label : tensor([3])
hidden : tensor([[[-0.7321, -0.8090, -0.9661,  0.9346, -0.6229]]],
       grad_fn=<StackBackward>), output : tensor([[-0.7321, -0.8090, -0.9661,  0.9346, -0.6229]], grad_fn=<ViewBackward>)
val : tensor([0.9346], grad_fn=<MaxBackward0>), idx : tens

       grad_fn=<StackBackward>), output : tensor([[-0.9791,  0.8087, -0.5909, -0.8994, -0.9319]], grad_fn=<ViewBackward>)
val : tensor([0.8087], grad_fn=<MaxBackward0>), idx : tensor([1])
iinput : tensor([0., 1., 0., 0., 0.]), label : tensor([0])
hidden : tensor([[[ 0.9900, -0.9953, -0.9994,  0.8931, -0.9870]]],
       grad_fn=<StackBackward>), output : tensor([[ 0.9900, -0.9953, -0.9994,  0.8931, -0.9870]], grad_fn=<ViewBackward>)
val : tensor([0.9900], grad_fn=<MaxBackward0>), idx : tensor([0])
hinput : tensor([1., 0., 0., 0., 0.]), label : tensor([2])
hidden : tensor([[[-0.9980, -0.8787,  0.9617, -0.9608, -0.9969]]],
       grad_fn=<StackBackward>), output : tensor([[-0.9980, -0.8787,  0.9617, -0.9608, -0.9969]], grad_fn=<ViewBackward>)
val : tensor([0.9617], grad_fn=<MaxBackward0>), idx : tensor([2])
einput : tensor([0., 0., 1., 0., 0.]), label : tensor([3])
hidden : tensor([[[-0.9844, -0.9022, -0.9842,  0.9621, -0.9564]]],
       grad_fn=<StackBackward>), output : tensor([[-0.9844

predicted string : input : tensor([1., 0., 0., 0., 0.]), label : tensor([1])
hidden : tensor([[[-0.9916,  0.9496, -0.7620, -0.9636, -0.9735]]],
       grad_fn=<StackBackward>), output : tensor([[-0.9916,  0.9496, -0.7620, -0.9636, -0.9735]], grad_fn=<ViewBackward>)
val : tensor([0.9496], grad_fn=<MaxBackward0>), idx : tensor([1])
iinput : tensor([0., 1., 0., 0., 0.]), label : tensor([0])
hidden : tensor([[[ 0.9970, -0.9968, -0.9999,  0.0601, -0.9987]]],
       grad_fn=<StackBackward>), output : tensor([[ 0.9970, -0.9968, -0.9999,  0.0601, -0.9987]], grad_fn=<ViewBackward>)
val : tensor([0.9970], grad_fn=<MaxBackward0>), idx : tensor([0])
hinput : tensor([1., 0., 0., 0., 0.]), label : tensor([2])
hidden : tensor([[[-0.9974, -0.8396,  0.9462, -0.9708, -0.9999]]],
       grad_fn=<StackBackward>), output : tensor([[-0.9974, -0.8396,  0.9462, -0.9708, -0.9999]], grad_fn=<ViewBackward>)
val : tensor([0.9462], grad_fn=<MaxBackward0>), idx : tensor([2])
einput : tensor([0., 0., 1., 0., 0.]), l

val : tensor([0.9981], grad_fn=<MaxBackward0>), idx : tensor([0])
hinput : tensor([1., 0., 0., 0., 0.]), label : tensor([2])
hidden : tensor([[[-0.9909, -0.7405,  0.9327, -0.9808, -1.0000]]],
       grad_fn=<StackBackward>), output : tensor([[-0.9909, -0.7405,  0.9327, -0.9808, -1.0000]], grad_fn=<ViewBackward>)
val : tensor([0.9327], grad_fn=<MaxBackward0>), idx : tensor([2])
einput : tensor([0., 0., 1., 0., 0.]), label : tensor([3])
hidden : tensor([[[-0.9989, -0.9718, -0.9981,  0.9145, -0.9980]]],
       grad_fn=<StackBackward>), output : tensor([[-0.9989, -0.9718, -0.9981,  0.9145, -0.9980]], grad_fn=<ViewBackward>)
val : tensor([0.9145], grad_fn=<MaxBackward0>), idx : tensor([3])
linput : tensor([0., 0., 0., 1., 0.]), label : tensor([3])
hidden : tensor([[[-0.9947, -0.9999, -0.9949,  0.9511, -0.0160]]],
       grad_fn=<StackBackward>), output : tensor([[-0.9947, -0.9999, -0.9949,  0.9511, -0.0160]], grad_fn=<ViewBackward>)
val : tensor([0.9511], grad_fn=<MaxBackward0>), idx : tens

### 그렇지만 for loop을 쓰지 않고 한번에 모든 input을 넣어서 output을 결과로 받아본다면 더욱 빠르게 연산이 가능할 것 같습니다

In [21]:
# num_classes = 5 
# input_size = 5 # one-hot size 
# hidden_size = 5 #output from the cell. 바로 결과 예측을 위해서 5로 설정 
# batch_size = 1 # sentence의 개수 (단어개수) 
# sequence_length = 1 # 이번에는 한번에 하나씩 해본다 
# num_layers = 1 # one layer rnn이다. (아직 안 다룬 개념)
sequence_length = 6

In [22]:
class Model(nn.Module): 
    def __init__(self): 
        super(Model,self).__init__() 
        self.rnn=nn.RNN(input_size=input_size, hidden_size=hidden_size, batch_first=True) 
        
    def forward(self, x, hidden): 
        #input x 를 (batch_size,sequence_length,input_size)로 reshape함 
        #just for make sure이라고 하심. 
        x=x.view(batch_size, sequence_length, input_size) 
        
        #Propagate input through RNN 
        #Input:(batch,seq_len,input_size) 
        out, hidden = self.rnn(x, hidden) 
        
        #for make sure, output이 N * 5 shape을 따르게 하기 위해서 
        out = out.view(-1, num_classes) 
        return out ### (변경점) hidden, out -> out 만 리턴하게
    
    def init_hidden(self): 
        #initialize hidden and cell states 
        return torch.zeros(num_layers, batch_size, hidden_size).to(device)

In [23]:
#그렇게 조금 바꾸고 나서, outputs = model(inputs,hidden)으로 몽땅 넣어주기만 하면 됩니다. 그럼 6개를 몽땅 output으로 뱉어내게 되죠.
#그럼 그걸 또 몽땅 CELoss로 넘겨 계산해주면 되는데요, 한번 코드로 자세히 보시기 바랍니다.

In [25]:
model = Model().to(device) 

criterion = nn.CrossEntropyLoss() 
optimizer = optim.Adam(model.parameters(),lr = 0.1) 

inputs = inputs.to(device) 
labels = labels.to(device)

for epoch in range(100): 
    optimizer.zero_grad() 
    loss = 0 
    hidden = model.init_hidden() 
    
    # print("predicted string : ",end="")

    print("inputs : {}, labels : {}".format(inputs, labels))
    outputs = model(inputs,hidden) 
    print("outputs : {}".format(outputs))
    loss = criterion(outputs,labels) 
    _, idx = outputs.max(1) 
    print("idx : {}".format(idx))
    result_str = [idx2char[c] for c in idx.squeeze()] 
    print("epoch: %d, loss: %1.3f" % (epoch+1,loss.data)) 
    print("Predicted string: ", ''.join(result_str)) 
    loss.backward() 
    optimizer.step()

inputs : tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [1., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.]]), labels : tensor([1, 0, 2, 3, 3, 4])
outputs : tensor([[ 0.1301, -0.4021, -0.0641,  0.6102, -0.0675],
        [ 0.2269, -0.7225,  0.3317,  0.6907,  0.5363],
        [ 0.5562, -0.6393, -0.3397,  0.7767,  0.4464],
        [ 0.8496, -0.0923, -0.0811,  0.4057,  0.3928],
        [ 0.2142, -0.4290,  0.2139,  0.6145,  0.3519],
        [ 0.2990, -0.5634,  0.0778,  0.5493,  0.3883]], grad_fn=<ViewBackward>)
idx : tensor([3, 3, 3, 0, 3, 3])
epoch: 1, loss: 1.715
Predicted string:  lllhll
inputs : tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [1., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.]]), labels : tensor([1, 0, 2, 3, 3, 4])
outputs : tensor([[ 0.2268, -0.1255,  0.2315,  0.6692,  0.2282],
        [ 0.5462, -0.5681,  0.5098,

        [-0.9995, -0.9835, -1.0000,  0.9985,  0.9258]], grad_fn=<ViewBackward>)
idx : tensor([1, 0, 2, 3, 3, 3])
epoch: 33, loss: 0.584
Predicted string:  ihelll
inputs : tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [1., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.]]), labels : tensor([1, 0, 2, 3, 3, 4])
outputs : tensor([[-0.9310,  0.8634, -0.4471, -0.4591, -0.9954],
        [ 0.9879, -0.6834, -0.9763, -0.9895, -0.9995],
        [-0.9824, -0.7922,  0.9724, -0.9727, -1.0000],
        [-0.8630, -0.9619, -0.9976,  0.9868, -0.9957],
        [-0.9985, -0.9993, -0.9999,  0.9979, -0.1737],
        [-0.9995, -0.9834, -1.0000,  0.9986,  0.9449]], grad_fn=<ViewBackward>)
idx : tensor([1, 0, 2, 3, 3, 3])
epoch: 34, loss: 0.577
Predicted string:  ihelll
inputs : tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [1., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
     

        [-0.9998, -0.9982, -1.0000,  0.9970,  0.9635]], grad_fn=<ViewBackward>)
idx : tensor([1, 0, 2, 3, 3, 3])
epoch: 66, loss: 0.530
Predicted string:  ihelll
inputs : tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [1., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.]]), labels : tensor([1, 0, 2, 3, 3, 4])
outputs : tensor([[-0.9663,  0.9790, -0.9571, -0.9438, -0.9974],
        [ 0.9969, -0.9917, -0.9971, -0.9999, -0.9999],
        [-0.9966, -0.9692,  0.9952, -0.9972, -1.0000],
        [-0.9402, -0.9952, -0.9998,  0.9975, -0.9994],
        [-0.9994, -1.0000, -1.0000,  0.9966, -0.3008],
        [-0.9998, -0.9982, -1.0000,  0.9970,  0.9660]], grad_fn=<ViewBackward>)
idx : tensor([1, 0, 2, 3, 3, 3])
epoch: 67, loss: 0.530
Predicted string:  ihelll
inputs : tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [1., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
     

epoch: 94, loss: 0.525
Predicted string:  ihelll
inputs : tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [1., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.]]), labels : tensor([1, 0, 2, 3, 3, 4])
outputs : tensor([[-0.9846,  0.9890, -0.9734, -0.9714, -0.9972],
        [ 0.9976, -0.9935, -0.9981, -0.9999, -1.0000],
        [-0.9985, -0.9852,  0.9977, -0.9985, -1.0000],
        [-0.9787, -0.9946, -0.9998,  0.9982, -0.9998],
        [-0.9996, -1.0000, -1.0000,  0.9959, -0.3590],
        [-0.9999, -0.9988, -1.0000,  0.9960,  0.9721]], grad_fn=<ViewBackward>)
idx : tensor([1, 0, 2, 3, 3, 3])
epoch: 95, loss: 0.525
Predicted string:  ihelll
inputs : tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [1., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.]]), labels : tensor([1, 0, 2, 3, 3, 4])
outputs : tensor([[-0.9849,  0.9891, -0.9737, -0.9

In [60]:
?nn.CrossEntropyLoss

### 모델 커스터마이징

In [13]:
num_classes = 5 
input_size = 5 # one-hot size 
hidden_size = 5 #output from the cell. 바로 결과 예측을 위해서 5로 설정 
batch_size = 1 # sentence의 개수 (단어개수) 
num_layers = 1 # one layer rnn이다. (아직 안 다룬 개념)
sequence_length = 6

In [14]:
# labels = torch.Tensor(y_data)
y_one_hot = [one_hot_lookup[x] for x in y_data] 
labels = torch.Tensor(y_one_hot)
labels

tensor([[0., 1., 0., 0., 0.],
        [1., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1.]])

In [15]:
class Model(nn.Module): 
    def __init__(self): 
        super(Model,self).__init__() 
        self.rnn=nn.RNN(input_size=input_size, hidden_size=hidden_size, batch_first=True) 
        #self.hidden2tag = nn.Linear(hidden_size, tagset_size)
        
    def forward(self, x, hidden): 
        #input x 를 (batch_size,sequence_length,input_size)로 reshape함 
        #just for make sure이라고 하심. 
        x=x.view(batch_size, sequence_length, input_size) 
        
        #Propagate input through RNN 
        #Input:(batch,seq_len,input_size) 
        out, _ = self.rnn(x, hidden)
        #out = self.hidden2tag(out)
        #for make sure, output이 N * 5 shape을 따르게 하기 위해서 
        out = out.view(-1, num_classes) 
        return out
    
    def init_hidden(self): 
        #initialize hidden and cell states 
        return torch.zeros(num_layers, batch_size, hidden_size).to(device)

In [18]:
model = Model().to(device) 

criterion = nn.MSELoss() 
optimizer = optim.Adam(model.parameters(),lr = 0.1) 

inputs = inputs.to(device) 
labels = labels.to(device)

for epoch in range(100): 
    optimizer.zero_grad() 
    loss = 0 
    hidden = model.init_hidden() 
    
    # print("predicted string : ",end="")

    print("inputs : {}, labels : {}".format(inputs, labels))
    outputs = model(inputs,hidden) 
    print("outputs : {}".format(outputs))
    loss = criterion(outputs,labels) 
    _, idx = outputs.max(1) 
    print("idx : {}".format(idx))
    result_str = [idx2char[c] for c in idx.squeeze()] 
    #print("outputs_sq : {}".format(outputs_sq))
    print("epoch: %d, loss: %1.3f" % (epoch+1,loss.data)) 
    print("Predicted string: ", ''.join(result_str)) 
    loss.backward() 
    optimizer.step()

inputs : tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [1., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.]]), labels : tensor([[0., 1., 0., 0., 0.],
        [1., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1.]])
outputs : tensor([[ 0.2470, -0.5169,  0.5739,  0.3350,  0.3811],
        [ 0.5984,  0.0836,  0.2650, -0.3262,  0.1799],
        [ 0.2399, -0.5019,  0.5154,  0.1753,  0.4337],
        [ 0.4316,  0.0474,  0.3973,  0.0674,  0.3606],
        [ 0.6092,  0.1818,  0.2357,  0.3739,  0.3201],
        [ 0.7022,  0.1119,  0.0580,  0.3089,  0.3031]], grad_fn=<ViewBackward>)
idx : tensor([2, 0, 2, 0, 0, 0])
epoch: 1, loss: 0.249
Predicted string:  ehehhh
inputs : tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [1., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0

        [-0.1613,  0.0691,  0.0403, -0.0633,  0.8751]], grad_fn=<ViewBackward>)
idx : tensor([1, 0, 2, 3, 3, 4])
epoch: 25, loss: 0.026
Predicted string:  ihello
inputs : tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [1., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.]]), labels : tensor([[0., 1., 0., 0., 0.],
        [1., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1.]])
outputs : tensor([[ 0.1022,  0.8178,  0.0849,  0.1011, -0.0816],
        [ 0.9623,  0.0988,  0.2177, -0.0337, -0.2149],
        [-0.0052, -0.0656,  0.7864,  0.1163, -0.1218],
        [ 0.0288, -0.0504,  0.2631,  0.9900, -0.2058],
        [ 0.0973, -0.1983, -0.1324,  0.7592,  0.2661],
        [-0.1301,  0.0152,  0.0012,  0.0960,  0.8224]], grad_fn=<ViewBackward>)
idx : tensor([1, 0, 2, 3, 3, 4])
epoch: 26, loss: 0.021
Predicted string:  ihello
inputs : t

       grad_fn=<ViewBackward>)
idx : tensor([1, 0, 2, 3, 3, 4])
epoch: 52, loss: 0.011
Predicted string:  ihello
inputs : tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [1., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.]]), labels : tensor([[0., 1., 0., 0., 0.],
        [1., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1.]])
outputs : tensor([[-0.0140,  0.8888, -0.0151,  0.0261, -0.0229],
        [ 0.9501, -0.0168,  0.0654,  0.0757, -0.0390],
        [ 0.0455,  0.0306,  0.8664,  0.0360, -0.0062],
        [-0.0360,  0.0196,  0.2144,  0.9963, -0.1000],
        [ 0.0580, -0.0277, -0.2921,  0.8191,  0.2046],
        [-0.0296, -0.0864, -0.0279, -0.0638,  0.7726]], grad_fn=<ViewBackward>)
idx : tensor([1, 0, 2, 3, 3, 4])
epoch: 53, loss: 0.011
Predicted string:  ihello
inputs : tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0.,

        [0., 0., 0., 0., 1.]])
outputs : tensor([[-2.6162e-03,  9.1318e-01,  1.3916e-02,  1.0345e-02, -6.1255e-03],
        [ 9.8427e-01,  1.5236e-02,  3.3893e-02,  2.7474e-02, -1.6450e-02],
        [-2.8124e-03, -1.0813e-02,  9.1055e-01,  2.2911e-02, -6.9947e-03],
        [-2.1838e-02,  5.1911e-02,  1.6762e-01,  9.9771e-01, -1.3177e-01],
        [-3.1766e-04, -4.9880e-02, -2.1474e-01,  8.1942e-01,  1.5754e-01],
        [ 1.4888e-02,  4.2165e-02,  3.7494e-03,  2.3230e-02,  7.9096e-01]],
       grad_fn=<ViewBackward>)
idx : tensor([1, 0, 2, 3, 3, 4])
epoch: 79, loss: 0.007
Predicted string:  ihello
inputs : tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [1., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.]]), labels : tensor([[0., 1., 0., 0., 0.],
        [1., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1.]])
outputs : tens