In [14]:
import math
import torch
import torch.nn as nn
import torch.optim as optim

In [90]:
class NaiveCustomLSTM(nn.Module):
    def __init__(self, input_sz:int, hidden_sz:int):
        super().__init__()
        self.input_size = input_sz
        self.hidden_size = hidden_sz
        self.W = nn.Linear(n_hidden, n_class, bias = False)
        self.b = nn.Parameter(torch.Tensor(n_class))
        
        # input gate
        self.U_i = nn.Parameter(torch.Tensor(input_sz,hidden_sz))
        self.V_i = nn.Parameter(torch.Tensor(hidden_sz,hidden_sz))
        self.b_i = nn.Parameter(torch.Tensor(hidden_sz))
        
        # forget gate
        self.U_f = nn.Parameter(torch.Tensor(input_sz,hidden_sz))
        self.V_f = nn.Parameter(torch.Tensor(hidden_sz,hidden_sz))
        self.b_f = nn.Parameter(torch.Tensor(hidden_sz))
        
        # cell state
        self.U_c = nn.Parameter(torch.Tensor(input_sz,hidden_sz))
        self.V_c = nn.Parameter(torch.Tensor(hidden_sz,hidden_sz))
        self.b_c = nn.Parameter(torch.Tensor(hidden_sz))
        
        # output gate
        self.U_o = nn.Parameter(torch.Tensor(input_sz,hidden_sz))
        self.V_o = nn.Parameter(torch.Tensor(hidden_sz,hidden_sz))
        self.b_o = nn.Parameter(torch.Tensor(hidden_sz))
        
        self.init_weights()
        
    def init_weights(self):
        stdv = 1.0 / math.sqrt(self.hidden_size)
        for w in self.parameters():
            w.data.uniform_(-stdv,stdv)
            
    def forward(self, x, init_states=None):
        # the shape of x is: batch_size, sequence_size, input_size
        batch_size, sequence_size, input_size = x.size()
        hidden_seq = []
        
        if init_states is None:
            h_t, c_t = (
                torch.zeros(batch_size,self.hidden_size),
                torch.zeros(batch_size,self.hidden_size),
            )
        else:
            h_t, c_t = init_states
            
        for t in range(sequence_size):
            # x_t: batch_size, input_size 10*3
            # input sequencely
            x_t = x[:,t,:]
            
            # i,f,o,g,c,h: batch_size, hidden_size
            i_t = torch.sigmoid(x_t@self.U_i + h_t@self.V_i + self.b_i)
            f_t = torch.sigmoid(x_t@self.U_f + h_t@self.V_f + self.b_f)
            o_t = torch.sigmoid(x_t@self.U_o + h_t@self.V_o + self.b_o)
            g_t = torch.tanh(x_t@self.U_c + h_t@self.V_c + self.b_c)
            c_t = f_t*c_t + i_t*g_t
            h_t = o_t*torch.tanh(c_t)
            
            # save the hidden sequence
            hidden_seq.append(h_t.unsqueeze(0))

        # connect list to torch
        hidden_seq = torch.cat(hidden_seq, dim=0)
        hidden_seq = hidden_seq[-1]

        # FC layer
        model = self.W(hidden_seq) + self.b
        return model
        

In [73]:
n_step = 3 # number of cells(= number of Step)
n_hidden = 128 # number of hidden units in one cell

char_arr = [c for c in 'abcdefghijklmnopqrstuvwxyz']
word_dict = {n: i for i, n in enumerate(char_arr)}
number_dict = {i: w for i, w in enumerate(char_arr)}
n_class = len(word_dict)  # number of class(=number of vocab)

seq_data = ['make', 'need', 'coal', 'word', 'love', 'hate', 'live', 'home', 'hash', 'star']

def make_batch():
    input_batch, target_batch = [], []

    for seq in seq_data:
        input = [word_dict[n] for n in seq[:-1]] # 'm', 'a' , 'k' is input
        target = word_dict[seq[-1]] # 'e' is target
        input_batch.append(np.eye(n_class)[input])
        target_batch.append(target)

    return input_batch, target_batch

In [75]:
input_batch, target_batch = make_batch()
input_batch = torch.FloatTensor(input_batch)
target_batch = torch.LongTensor(target_batch)

In [91]:
model = NaiveCustomLSTM(n_class,n_hidden)

In [92]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [93]:
for epoch in range(1000):
    optimizer.zero_grad()

    output = model(input_batch)
    loss = criterion(output, target_batch)
    if (epoch + 1) % 100 == 0:
        print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))

    loss.backward()
    optimizer.step()

Epoch: 0100 cost = 0.486980
Epoch: 0200 cost = 0.030088
Epoch: 0300 cost = 0.008809
Epoch: 0400 cost = 0.004180
Epoch: 0500 cost = 0.002471
Epoch: 0600 cost = 0.001644
Epoch: 0700 cost = 0.001179
Epoch: 0800 cost = 0.000890
Epoch: 0900 cost = 0.000698
Epoch: 1000 cost = 0.000562


In [94]:
inputs = [sen[:3] for sen in seq_data]

predict = model(input_batch).data.max(1, keepdim=True)[1]
print(inputs, '->', [number_dict[n.item()] for n in predict.squeeze()])

['mak', 'nee', 'coa', 'wor', 'lov', 'hat', 'liv', 'hom', 'has', 'sta'] -> ['e', 'd', 'l', 'd', 'e', 'e', 'e', 'e', 'h', 'r']


In [69]:
output.size()

torch.Size([10, 26])