In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import random
from tqdm import tqdm
from sklearn.utils import shuffle

In [2]:
num_embeddings = 26
embedding_dim = 3
emb = nn.Embedding(num_embeddings, embedding_dim)

In [3]:
x = torch.tensor([1,2,3,4,5])
x = emb(x).unsqueeze(1)
x.shape

torch.Size([5, 1, 3])

In [4]:
input_size = embedding_dim
hidden_size = 6
lstm = nn.LSTM( input_size ,  hidden_size, num_layers = 1, bidirectional = False)

In [5]:
W_ii = lstm.weight_ih_l0[:hidden_size]
W_if = lstm.weight_ih_l0[hidden_size:hidden_size*2]
W_ig = lstm.weight_ih_l0[hidden_size*2:hidden_size*3]
W_io = lstm.weight_ih_l0[hidden_size*3:]

W_hi = lstm.weight_hh_l0[:hidden_size]
W_hf = lstm.weight_hh_l0[hidden_size:hidden_size*2]
W_hg = lstm.weight_hh_l0[hidden_size*2:hidden_size*3]
W_ho = lstm.weight_hh_l0[hidden_size*3:]

b_ii = lstm.bias_ih_l0[:hidden_size]
b_if = lstm.bias_ih_l0[hidden_size:hidden_size*2]
b_ig = lstm.bias_ih_l0[hidden_size*2:hidden_size*3]
b_io = lstm.bias_ih_l0[hidden_size*3:]

b_hi = lstm.bias_hh_l0[:hidden_size]
b_hf = lstm.bias_hh_l0[hidden_size:hidden_size*2]
b_hg = lstm.bias_hh_l0[hidden_size*2:hidden_size*3]
b_ho = lstm.bias_hh_l0[hidden_size*3:]

In [6]:
ht = torch.zeros(hidden_size, 1)
ct = torch.zeros(hidden_size, 1)
for i in range(len(x)):
    xt = x[i]
    it = torch.sigmoid(W_ii.mm(xt.transpose(0,1)) + b_ii.unsqueeze(1) +  W_hi.mm(ht) + b_hi.unsqueeze(1))
    ft = torch.sigmoid(W_if.mm(xt.transpose(0,1)) + b_if.unsqueeze(1) +  W_hf.mm(ht) + b_hf.unsqueeze(1))
    gt = torch.tanh(W_ig.mm(xt.transpose(0,1)) + b_ig.unsqueeze(1) +  W_hg.mm(ht) + b_hg.unsqueeze(1))
    ot = torch.sigmoid(W_io.mm(xt.transpose(0,1)) + b_io.unsqueeze(1) +  W_ho.mm(ht) + b_ho.unsqueeze(1))
    ct = ft * ct + it * gt
    ht = ot * torch.tanh(ct)
    print(ht.transpose(0,1))
print()
print(ct.transpose(0,1))

tensor([[ 3.5591e-01, -1.6668e-01,  5.0164e-03,  7.6824e-02, -4.9371e-02,
          2.8483e-04]], grad_fn=<TransposeBackward0>)
tensor([[ 0.3421, -0.2757,  0.0861,  0.0047, -0.0971, -0.0536]],
       grad_fn=<TransposeBackward0>)
tensor([[ 0.2958, -0.1578,  0.1216,  0.0326, -0.0636,  0.0717]],
       grad_fn=<TransposeBackward0>)
tensor([[ 0.3395, -0.2268,  0.1474,  0.0125, -0.0498,  0.0195]],
       grad_fn=<TransposeBackward0>)
tensor([[ 0.2856, -0.1752,  0.1543, -0.0018, -0.0790,  0.0782]],
       grad_fn=<TransposeBackward0>)

tensor([[ 0.6083, -0.5396,  0.2967, -0.0040, -0.1860,  0.1845]],
       grad_fn=<TransposeBackward0>)


In [7]:
lstm(x)

(tensor([[[ 3.5591e-01, -1.6668e-01,  5.0164e-03,  7.6824e-02, -4.9371e-02,
            2.8483e-04]],
 
         [[ 3.4206e-01, -2.7575e-01,  8.6137e-02,  4.6832e-03, -9.7088e-02,
           -5.3595e-02]],
 
         [[ 2.9580e-01, -1.5784e-01,  1.2163e-01,  3.2591e-02, -6.3551e-02,
            7.1739e-02]],
 
         [[ 3.3953e-01, -2.2675e-01,  1.4739e-01,  1.2475e-02, -4.9814e-02,
            1.9518e-02]],
 
         [[ 2.8555e-01, -1.7519e-01,  1.5431e-01, -1.8015e-03, -7.8964e-02,
            7.8248e-02]]], grad_fn=<StackBackward>),
 (tensor([[[ 0.2856, -0.1752,  0.1543, -0.0018, -0.0790,  0.0782]]],
         grad_fn=<StackBackward>),
  tensor([[[ 0.6083, -0.5396,  0.2967, -0.0040, -0.1860,  0.1845]]],
         grad_fn=<StackBackward>)))

In [8]:
a = nn.Parameter(torch.Tensor(6, 1))
print(a)
nn.init.xavier_normal_(a)
print(a)

Parameter containing:
tensor([[0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.]], requires_grad=True)
Parameter containing:
tensor([[ 0.0646],
        [ 0.0126],
        [-0.4932],
        [ 0.0813],
        [ 0.4289],
        [ 0.7953]], requires_grad=True)


In [9]:
class MyLSTM(nn.Module):
    def __init__(self, input_size, hidden_size):
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        
        self.W_ii = nn.Parameter(torch.Tensor(self.hidden_size, self.input_size))
        self.W_if = nn.Parameter(torch.Tensor(self.hidden_size, self.input_size))
        self.W_ig = nn.Parameter(torch.Tensor(self.hidden_size, self.input_size))
        self.W_io = nn.Parameter(torch.Tensor(self.hidden_size, self.input_size))

        self.W_hi = nn.Parameter(torch.Tensor(self.hidden_size, self.hidden_size))
        self.W_hf = nn.Parameter(torch.Tensor(self.hidden_size, self.hidden_size))
        self.W_hg = nn.Parameter(torch.Tensor(self.hidden_size, self.hidden_size))
        self.W_ho = nn.Parameter(torch.Tensor(self.hidden_size, self.hidden_size))

        self.b_ii = nn.Parameter(torch.Tensor(self.hidden_size, 1))
        self.b_if = nn.Parameter(torch.Tensor(self.hidden_size, 1))
        self.b_ig = nn.Parameter(torch.Tensor(self.hidden_size, 1))
        self.b_io = nn.Parameter(torch.Tensor(self.hidden_size, 1))

        self.b_hi = nn.Parameter(torch.Tensor(self.hidden_size, 1))
        self.b_hf = nn.Parameter(torch.Tensor(self.hidden_size, 1))
        self.b_hg = nn.Parameter(torch.Tensor(self.hidden_size, 1))
        self.b_ho = nn.Parameter(torch.Tensor(self.hidden_size, 1))
        
        self.initWeight()
    def initWeight(self):
        nn.init.xavier_normal_(self.W_ii)
        nn.init.xavier_normal_(self.W_if)
        nn.init.xavier_normal_(self.W_ig)
        nn.init.xavier_normal_(self.W_io)
        nn.init.xavier_normal_(self.W_hi)
        nn.init.xavier_normal_(self.W_hf)
        nn.init.xavier_normal_(self.W_hg)
        nn.init.xavier_normal_(self.W_ho)
        nn.init.xavier_normal_(self.b_ii)
        nn.init.xavier_normal_(self.b_if)
        nn.init.xavier_normal_(self.b_ig)
        nn.init.xavier_normal_(self.b_io)
        nn.init.xavier_normal_(self.b_hi)
        nn.init.xavier_normal_(self.b_hf)
        nn.init.xavier_normal_(self.b_hg)
        nn.init.xavier_normal_(self.b_ho)
    # todo: input hidden
    def forward(self, x):
        # x [lenSeq, batchSize, nFeatures]
        
        res = torch.Tensor(x.shape[0], x.shape[1], hidden_size)
        ht = torch.zeros( self.hidden_size, 1)
        ct = torch.zeros( self.hidden_size, 1)
        for i in range(len(x)):
            xt = x[i]
            it = torch.sigmoid(self.W_ii.mm(xt.transpose(0,1)) + self.b_ii +  self.W_hi.mm(ht) + self.b_hi)
            ft = torch.sigmoid(self.W_if.mm(xt.transpose(0,1)) + self.b_if +  self.W_hf.mm(ht) + self.b_hf)
            gt = torch.tanh(self.W_ig.mm(xt.transpose(0,1)) + self.b_ig +  self.W_hg.mm(ht) + self.b_hg)
            ot = torch.sigmoid(self.W_io.mm(xt.transpose(0,1)) + self.b_io +  self.W_ho.mm(ht) + self.b_ho)
            ct = ft * ct + it * gt
            ht = ot * torch.tanh(ct)
            res[i, 0] = ht.squeeze(1)
        return res, (ht.transpose(0,1).unsqueeze(0), ct.transpose(0,1).unsqueeze(0))
mlstm = MyLSTM(input_size, hidden_size)

In [10]:
mlstm.initWeight()
mlstm(x)

(tensor([[[ 0.1281,  0.2257,  0.3533,  0.3819,  0.1263,  0.0016]],
 
         [[-0.0973,  0.1956,  0.2991,  0.3740,  0.1990, -0.1664]],
 
         [[-0.0779,  0.3853,  0.3049, -0.0108,  0.1184, -0.1146]],
 
         [[-0.0171,  0.2958,  0.3935, -0.1407,  0.1658, -0.0881]],
 
         [[-0.1013,  0.4276,  0.3682, -0.0877,  0.1327, -0.0884]]],
        grad_fn=<CopySlices>),
 (tensor([[[-0.1013,  0.4276,  0.3682, -0.0877,  0.1327, -0.0884]]],
         grad_fn=<UnsqueezeBackward0>),
  tensor([[[-0.2218,  0.7667,  0.6361, -0.2010,  0.1967, -0.1630]]],
         grad_fn=<UnsqueezeBackward0>)))

In [11]:
lstm(x)

(tensor([[[ 3.5591e-01, -1.6668e-01,  5.0164e-03,  7.6824e-02, -4.9371e-02,
            2.8483e-04]],
 
         [[ 3.4206e-01, -2.7575e-01,  8.6137e-02,  4.6832e-03, -9.7088e-02,
           -5.3595e-02]],
 
         [[ 2.9580e-01, -1.5784e-01,  1.2163e-01,  3.2591e-02, -6.3551e-02,
            7.1739e-02]],
 
         [[ 3.3953e-01, -2.2675e-01,  1.4739e-01,  1.2475e-02, -4.9814e-02,
            1.9518e-02]],
 
         [[ 2.8555e-01, -1.7519e-01,  1.5431e-01, -1.8015e-03, -7.8964e-02,
            7.8248e-02]]], grad_fn=<StackBackward>),
 (tensor([[[ 0.2856, -0.1752,  0.1543, -0.0018, -0.0790,  0.0782]]],
         grad_fn=<StackBackward>),
  tensor([[[ 0.6083, -0.5396,  0.2967, -0.0040, -0.1860,  0.1845]]],
         grad_fn=<StackBackward>)))