In [1]:
import numpy as np
import torch
from torch.nn import LSTM

In [2]:
torch.__version__

'2.4.1+cu121'

In [3]:
def to_parameter(array):
    return torch.nn.Parameter(torch.Tensor(array))

In [4]:
lstm = LSTM(2, 4, 1)

In [5]:
def round_up_params(param):
    x = param.data.numpy().tolist()

    result = []
    for n in x:
        tmp = [int(i*100) / 100 for i in n]
        result.append(tmp)
    
    return result

In [7]:
round_up_params(lstm.weight_hh_l0)

[[0.08, 0.43, 0.07, -0.09],
 [-0.09, 0.15, -0.47, -0.15],
 [-0.3, 0.15, -0.49, 0.4],
 [0.38, 0.33, 0.0, -0.41],
 [-0.2, -0.01, -0.33, -0.04],
 [0.2, 0.16, 0.0, 0.24],
 [0.48, 0.2, -0.25, -0.25],
 [-0.16, -0.37, -0.28, 0.31],
 [0.15, 0.11, -0.02, 0.24],
 [0.17, -0.37, -0.45, 0.47],
 [-0.42, 0.09, -0.26, 0.21],
 [-0.48, 0.0, -0.01, 0.28],
 [0.25, -0.03, 0.45, 0.22],
 [0.16, -0.28, -0.33, -0.11],
 [0.05, -0.43, 0.21, 0.19],
 [0.4, 0.33, -0.24, -0.15]]

In [9]:
# Set weights and bias
x = np.array([0.5, 0.1])
h0 = np.array([-0.5, 0.18, 0.11, -0.06])
c0 = np.array([-0.47,  0.32, -0.76,  0.47])

lstm.weight_ih_l0 = to_parameter([[-0.36, 0.03],
                                 [-0.17, -0.08],
                                 [0.43, 0.19],
                                 [-0.22, 0.33],
                                 [-0.46, 0.0],
                                 [-0.09, -0.25],
                                 [0.09, 0.3],
                                 [-0.43, -0.07],
                                 [-0.03, 0.37],
                                 [-0.32, -0.35],
                                 [-0.22, -0.46],
                                 [-0.02, 0.19],
                                 [-0.4, 0.22],
                                 [0.08, -0.44],
                                 [-0.35, -0.13],
                                 [-0.15, -0.47]])
lstm.weight_hh_l0 = to_parameter([[-0.47, 0.16, -0.18, -0.28],
                                 [0.45, -0.29, -0.43, -0.01],
                                 [0.04, -0.44, 0.02, -0.02],
                                 [-0.35, 0.15, -0.41, -0.07],
                                 [0.1, -0.19, -0.24, 0.45],
                                 [0.08, -0.13, -0.05, 0.38],
                                 [-0.22, 0.36, 0.43, 0.11],
                                 [0.31, 0.01, -0.41, -0.11],
                                 [-0.22, -0.38, -0.44, -0.48],
                                 [0.09, -0.36, -0.35, 0.05],
                                 [0.05, -0.26, 0.24, -0.17],
                                 [-0.05, 0.16, 0.3, 0.09],
                                 [0.2, 0.45, 0.07, -0.08],
                                 [0.12, 0.41, 0.45, -0.24],
                                 [-0.39, 0.12, 0.0, 0.25],
                                 [-0.02, 0.47, 0.05, -0.43]])

lstm.bias_hh_l0 = to_parameter([-0.44,  0.15, -0.12,  0.35, -0.13, -0.18, -0.44, -0.47,
                                 0.01,  0.17,  0.18, -0.23, -0.32, -0.19,  0.05,  0.14])
lstm.bias_ih_l0 = to_parameter([ 0.47,  0.2,  0.23, -0.24,  0.02, -0.28, -0.32,  0.01,
                                -0.42,  0.29, 0.46,  0.12,  0.45, -0.41,  0.30,  0.11])

In [10]:
lstm.weight_ih_l0, lstm.bias_ih_l0

(Parameter containing:
 tensor([[-0.3600,  0.0300],
         [-0.1700, -0.0800],
         [ 0.4300,  0.1900],
         [-0.2200,  0.3300],
         [-0.4600,  0.0000],
         [-0.0900, -0.2500],
         [ 0.0900,  0.3000],
         [-0.4300, -0.0700],
         [-0.0300,  0.3700],
         [-0.3200, -0.3500],
         [-0.2200, -0.4600],
         [-0.0200,  0.1900],
         [-0.4000,  0.2200],
         [ 0.0800, -0.4400],
         [-0.3500, -0.1300],
         [-0.1500, -0.4700]], requires_grad=True),
 Parameter containing:
 tensor([ 0.4700,  0.2000,  0.2300, -0.2400,  0.0200, -0.2800, -0.3200,  0.0100,
         -0.4200,  0.2900,  0.4600,  0.1200,  0.4500, -0.4100,  0.3000,  0.1100],
        requires_grad=True))

In [13]:
def to_la_1d(array):
    array = [f'{i:.3f}' for i in array]
    mas = r"&".join(array)
    
    return r"\left\lbrack\begin{matrix}"+mas+r"\\\end{matrix}\right\rbrack"

def to_la_nd(array):
    j=[]
    for i in array:
        j.append(list(map(str, i)))

    result = r""
    for i in j:
        result+= r"&".join(i) + r"\\"

    return r"\left\lbrack\begin{bmatrix}"+result+r"\end{bmatrix}\rbrack\right"

    

print(to_la_1d( [-0.13, -0.18, -0.44, -0.47]))
print(to_la_nd([[ 0.1,-0.19 ,-0.24,0.45]
,[ 0.08, -0.13 ,-0.05,0.38]
,[-0.22,0.36,0.43,0.11]
,[ 0.31,0.01 ,-0.41, -0.11]]))

\left\lbrack\begin{matrix}-0.130&-0.180&-0.440&-0.470\\\end{matrix}\right\rbrack
\left\lbrack\begin{bmatrix}0.1&-0.19&-0.24&0.45\\0.08&-0.13&-0.05&0.38\\-0.22&0.36&0.43&0.11\\0.31&0.01&-0.41&-0.11\\\end{bmatrix}\rbrack\right


In [14]:
print('Input:', x.shape, x)
print('h0:', h0.shape, h0)
print('c0', c0.shape, c0)

print('LSTM')
output, (hn, cn) = lstm(torch.Tensor(x).reshape(1, -1), 
                        (torch.Tensor(h0).reshape(1, -1), torch.Tensor(c0).reshape(1, -1)))
print('Out:', output.shape, output)
print('hn:', hn.shape, hn)
print('cn', cn.shape, cn)

Input: (2,) [0.5 0.1]
h0: (4,) [-0.5   0.18  0.11 -0.06]
c0 (4,) [-0.47  0.32 -0.76  0.47]
LSTM
Out: torch.Size([1, 4]) tensor([[-0.1701,  0.0613, -0.0330,  0.0715]], grad_fn=<SqueezeBackward1>)
hn: torch.Size([1, 4]) tensor([[-0.1701,  0.0613, -0.0330,  0.0715]], grad_fn=<SqueezeBackward1>)
cn torch.Size([1, 4]) tensor([[-0.3651,  0.1665, -0.0560,  0.1277]], grad_fn=<SqueezeBackward1>)


In [15]:
def sigmoid(x):
    return 1/(1 + np.exp(-1 * x))

In [17]:
ih = lstm.state_dict()['weight_ih_l0'].numpy()
hh = lstm.state_dict()['weight_hh_l0'].numpy()

W = np.array(np.split(ih, 4))
W1 = np.array(np.split(hh, 4))

bias_ih = lstm.state_dict()['bias_ih_l0'].numpy()
bias_hh = lstm.state_dict()['bias_hh_l0'].numpy()

b = np.array(np.split(bias_ih, 4))
b1 = np.array(np.split(bias_hh, 4))

print('Bias ih:', bias_ih.shape, bias_ih, end='\n\n')
print('Bias:', b.shape, b)

Bias ih: (16,) [ 0.47  0.2   0.23 -0.24  0.02 -0.28 -0.32  0.01 -0.42  0.29  0.46  0.12
  0.45 -0.41  0.3   0.11]

Bias: (4, 4) [[ 0.47  0.2   0.23 -0.24]
 [ 0.02 -0.28 -0.32  0.01]
 [-0.42  0.29  0.46  0.12]
 [ 0.45 -0.41  0.3   0.11]]


In [22]:
i = sigmoid((W[0] @ x)+b[0] + (W1[0] @ h0)+b1[0])
f = sigmoid((W[1] @ x)+b[1] + (W1[1] @ h0)+b1[1])
g = np.tanh((W[2] @ x)+b[2] + (W1[2] @ h0)+b1[2])
o = sigmoid((W[3] @ x)+b[3] + (W1[3] @ h0)+b1[3])
c = f * c0 + i * g
h = o * np.tanh(c)
print(h, output)

[-0.17009832  0.06127686 -0.0329758   0.07150263] tensor([[-0.1701,  0.0613, -0.0330,  0.0715]], grad_fn=<SqueezeBackward1>)


In [23]:
print('Params')
print('W', W)
print('W1', W1)
print()
print('b', b)
print('b1', b1)

Params
W [[[-0.36  0.03]
  [-0.17 -0.08]
  [ 0.43  0.19]
  [-0.22  0.33]]

 [[-0.46  0.  ]
  [-0.09 -0.25]
  [ 0.09  0.3 ]
  [-0.43 -0.07]]

 [[-0.03  0.37]
  [-0.32 -0.35]
  [-0.22 -0.46]
  [-0.02  0.19]]

 [[-0.4   0.22]
  [ 0.08 -0.44]
  [-0.35 -0.13]
  [-0.15 -0.47]]]
W1 [[[-0.47  0.16 -0.18 -0.28]
  [ 0.45 -0.29 -0.43 -0.01]
  [ 0.04 -0.44  0.02 -0.02]
  [-0.35  0.15 -0.41 -0.07]]

 [[ 0.1  -0.19 -0.24  0.45]
  [ 0.08 -0.13 -0.05  0.38]
  [-0.22  0.36  0.43  0.11]
  [ 0.31  0.01 -0.41 -0.11]]

 [[-0.22 -0.38 -0.44 -0.48]
  [ 0.09 -0.36 -0.35  0.05]
  [ 0.05 -0.26  0.24 -0.17]
  [-0.05  0.16  0.3   0.09]]

 [[ 0.2   0.45  0.07 -0.08]
  [ 0.12  0.41  0.45 -0.24]
  [-0.39  0.12  0.    0.25]
  [-0.02  0.47  0.05 -0.43]]]

b [[ 0.47  0.2   0.23 -0.24]
 [ 0.02 -0.28 -0.32  0.01]
 [-0.42  0.29  0.46  0.12]
 [ 0.45 -0.41  0.3   0.11]]
b1 [[-0.44  0.15 -0.12  0.35]
 [-0.13 -0.18 -0.44 -0.47]
 [ 0.01  0.17  0.18 -0.23]
 [-0.32 -0.19  0.05  0.14]]


In [24]:
for j in range(4):
    print(f'[{j}] \t  W @ x + b', (W[j] @ x)+b[j])
    print(f'[{j}] \tW1 @ h0 + b', (W1[j] @ h0)+b1[j])
    print()

[0] 	  W @ x + b [ 0.29299999  0.107       0.46400001 -0.31699999]
[0] 	W1 @ h0 + b [-0.1792     -0.17389999 -0.2158      0.51109999]

[1] 	  W @ x + b [-0.21       -0.35       -0.24499999 -0.212     ]
[1] 	W1 @ h0 + b [-0.26759999 -0.27170001 -0.22449999 -0.6617    ]

[2] 	  W @ x + b [-0.39799999  0.095       0.30400001  0.129     ]
[2] 	W1 @ h0 + b [ 0.032       0.0187      0.14480001 -0.1486    ]

[3] 	  W @ x + b [ 0.27199998 -0.414       0.11200002 -0.012     ]
[3] 	W1 @ h0 + b [-0.3265     -0.1123      0.25159999  0.2659    ]



In [25]:
print('i', i)
print('f', f)
print('g', g)
print('o', o)
print('c', c)
print('h', h)

i [0.52841933 0.48328124 0.56173341 0.54837322]
f [0.38281901 0.34939491 0.3847346  0.29448499]
g [-0.350488    0.11321255  0.42091212 -0.0195975 ]
o [0.48637837 0.37138027 0.58991161 0.56313619]
c [-0.36512957  0.16651987 -0.05595789  0.1276612 ]
h [-0.17009832  0.06127686 -0.0329758   0.07150263]
