In [1]:
import torch
from torch import nn
from torch.autograd import Variable

### This is basic RNN network definition

In [2]:
basic_rnn = nn.RNN(input_size=20, hidden_size=50, num_layers=2)

### get first layer $w_{ih}$ weight

In [11]:
basic_rnn.weight_ih_l0

Parameter containing:
-4.4217e-02 -1.1636e-02 -1.0950e-01  ...   3.0889e-02 -5.3554e-02  1.3470e-02
 2.0199e-02  2.6346e-02 -1.2236e-01  ...   3.0369e-02  3.1875e-02  7.5414e-02
-7.3592e-02 -1.0765e-01 -6.4064e-02  ...  -1.4163e-02 -2.7480e-02 -7.8861e-02
                ...                   ⋱                   ...                
 1.3970e-01  5.2258e-02 -1.5994e-02  ...   7.2413e-02 -1.1146e-01 -4.2005e-02
 1.9304e-02  1.2416e-01  4.3513e-02  ...  -6.9903e-02 -1.9987e-02  9.1069e-02
-1.2357e-01  1.3164e-01  4.9964e-02  ...  -1.2016e-01  1.1797e-01 -7.6756e-02
[torch.FloatTensor of size 50x20]

### get first layer $w_{hh}$ weight

In [15]:
basic_rnn.weight_hh_l0

Parameter containing:
 0.0022  0.0818  0.0834  ...  -0.1024 -0.0085 -0.0421
-0.0973  0.1049 -0.0598  ...  -0.0427  0.0391 -0.0953
-0.0930 -0.0776  0.1303  ...   0.0230 -0.1142  0.1007
          ...             ⋱             ...          
-0.0447 -0.1351 -0.0234  ...   0.0948  0.0350  0.0472
 0.0477  0.1165 -0.0886  ...  -0.0022  0.1095 -0.0051
-0.0308  0.0166  0.0746  ...  -0.0695  0.0498  0.0692
[torch.FloatTensor of size 50x50]

### get first layer $b_{ih}$ bias

In [16]:
basic_rnn.bias_hh_l0

Parameter containing:
-0.1202
-0.0048
-0.1250
-0.0693
-0.1308
-0.1012
-0.1178
-0.0295
-0.1199
-0.0158
-0.0685
-0.0488
-0.0250
 0.0746
-0.0528
-0.0815
 0.0856
-0.0298
-0.1252
-0.0927
-0.0828
-0.0874
-0.0054
-0.0830
-0.0754
-0.0407
 0.0857
-0.1328
 0.1402
-0.0688
-0.0966
-0.1279
-0.1050
-0.1330
-0.0941
-0.0059
-0.1115
 0.0742
-0.0698
-0.0611
 0.1103
-0.1305
 0.0884
 0.1018
-0.0922
-0.0763
-0.1081
-0.1194
 0.0293
-0.0630
[torch.FloatTensor of size 50]

### get toy input to network

In [22]:
toy_input = Variable(torch.randn(100, 32, 20))
h_0 = Variable(torch.randn(2, 32, 50))  # layer * direction, batch, hidden_size

In [24]:
toy_output, h_n = basic_rnn(toy_input, h_0)

In [25]:
toy_output.size()  # seq, batch, hidden_size

torch.Size([100, 32, 50])

In [26]:
h_n.size()  # layer * direction, batch, hidden_size

torch.Size([2, 32, 50])

### LSTM

In [32]:
lstm = nn.LSTM(input_size=20, hidden_size=50, num_layers=2)

In [31]:
lstm.weight_ih_l0

Parameter containing:
 0.0438  0.0636 -0.1081  ...  -0.1173  0.1311 -0.0928
-0.0769  0.0477 -0.1354  ...   0.0995  0.0037 -0.0498
 0.0960 -0.0333  0.0674  ...   0.0891 -0.0534 -0.0954
          ...             ⋱             ...          
 0.0957 -0.0410  0.1212  ...   0.0975  0.1042 -0.1136
 0.0578 -0.0604 -0.1190  ...  -0.1373  0.0272  0.1093
 0.0593 -0.1397 -0.1055  ...   0.1406 -0.0876  0.0023
[torch.FloatTensor of size 200x20]

In [33]:
lstm_out, (h_n, c_n) = lstm(toy_input)

In [34]:
lstm_out.size()

torch.Size([100, 32, 50])

In [35]:
h_n.size()

torch.Size([2, 32, 50])

In [36]:
c_n.size()

torch.Size([2, 32, 50])