In [1]:
import torch
import torch.nn as nn

In [2]:
torch.manual_seed(1)
n_in, n_out = 3, 5

inpt = torch.randn([6])

inpt = inpt.view(2, 1, 3)

# print(f'inpt: {inpt}, inpt shape: {inpt.shape}')

vanilla_rnn = nn.RNN(n_in, n_out)
out, hx = vanilla_rnn(inpt)

print(f'out: {out}, \n\nout shape: {out.shape}, \n\nhx: {hx}, \n\nhx shape: {hx.shape}')

out: tensor([[[ 0.0037,  0.1530,  0.7012, -0.1402,  0.1944]],

        [[ 0.3124, -0.1422,  0.5204, -0.2674,  0.0915]]],
       grad_fn=<StackBackward>), 

out shape: torch.Size([2, 1, 5]), 

hx: tensor([[[ 0.3124, -0.1422,  0.5204, -0.2674,  0.0915]]],
       grad_fn=<StackBackward>), 

hx shape: torch.Size([1, 1, 5])


In [3]:
for name, param in vanilla_rnn.named_parameters():
    print(name, param, param.shape)

weight_ih_l0 Parameter containing:
tensor([[ 0.1633, -0.1743, -0.0326],
        [-0.0403,  0.0648, -0.0018],
        [ 0.3909,  0.1392, -0.1665],
        [-0.2701, -0.0750, -0.1929],
        [-0.1433,  0.0214,  0.2666]], requires_grad=True) torch.Size([5, 3])
weight_hh_l0 Parameter containing:
tensor([[ 0.2431, -0.4372,  0.2772,  0.1249,  0.4242],
        [ 0.2952, -0.4075, -0.4252, -0.2157,  0.3927],
        [-0.0745,  0.1914, -0.2078,  0.4388, -0.1892],
        [ 0.3354,  0.0053, -0.2356,  0.2299, -0.2374],
        [ 0.1315, -0.1291, -0.0490, -0.4299, -0.2132]], requires_grad=True) torch.Size([5, 5])
bias_ih_l0 Parameter containing:
tensor([ 0.2427, -0.1087,  0.4454,  0.3585, -0.0209], requires_grad=True) torch.Size([5])
bias_hh_l0 Parameter containing:
tensor([-0.2985,  0.2723,  0.1388, -0.2891,  0.2905], requires_grad=True) torch.Size([5])


In [4]:
wih = vanilla_rnn.weight_ih_l0
whh = vanilla_rnn.weight_hh_l0
bih = vanilla_rnn.bias_ih_l0
bhh = vanilla_rnn.bias_hh_l0

print(f'wih shape: {wih.shape}, whh shape = {whh.shape}\
bih shape: {bih.shape}, bhh shape = {bhh.shape}')

input = inpt

wih shape: torch.Size([5, 3]), whh shape = torch.Size([5, 5])bih shape: torch.Size([5]), bhh shape = torch.Size([5])


In [5]:
tanh = nn.Tanh()
output = []
hid = torch.zeros(1, 5, dtype = torch.float32)
# print(hid)
for i in range(input.shape[0]):
    x = input[i]
    i1 = x @ torch.transpose(wih, 0, 1) + bih
    h1 = hid @ torch.transpose(whh, 0, 1) + bhh
    
    hid = tanh(i1+h1)
    
    output.append(hid)

hid = hid.unsqueeze(0)
output = torch.stack(output, dim = 0)

In [6]:
print("Pytorch results: ")
print(f'out: {out}, \n\nout shape: {out.shape}, \n\nhx: {hx}, \n\nhx shape: {hx.shape}')
print()

print("My results: ")
print(f'out: {output}, \n\nout shape: {output.shape}, \n\nhx: {hid}, \n\nhx shape: {hid.shape}')

Pytorch results: 
out: tensor([[[ 0.0037,  0.1530,  0.7012, -0.1402,  0.1944]],

        [[ 0.3124, -0.1422,  0.5204, -0.2674,  0.0915]]],
       grad_fn=<StackBackward>), 

out shape: torch.Size([2, 1, 5]), 

hx: tensor([[[ 0.3124, -0.1422,  0.5204, -0.2674,  0.0915]]],
       grad_fn=<StackBackward>), 

hx shape: torch.Size([1, 1, 5])

My results: 
out: tensor([[[ 0.0037,  0.1530,  0.7012, -0.1402,  0.1944]],

        [[ 0.3124, -0.1422,  0.5204, -0.2674,  0.0915]]],
       grad_fn=<StackBackward>), 

out shape: torch.Size([2, 1, 5]), 

hx: tensor([[[ 0.3124, -0.1422,  0.5204, -0.2674,  0.0915]]],
       grad_fn=<UnsqueezeBackward0>), 

hx shape: torch.Size([1, 1, 5])


## **BIDIRECTION** Vanilla RNN

In [219]:
# torch.manual_seed(1)
n_in, n_out = 3, 5

inpt = torch.randn([6])

inpt = inpt.view(2, 1, 3)

print(f'inpt: {inpt}, inpt shape: {inpt.shape}')

vanilla_rnn = nn.RNN(n_in, n_out,bidirectional = True)
out, hx = vanilla_rnn(inpt)

print(f'out: {out}, \n\nout shape: {out.shape}, \n\nhx: {hx}, \n\nhx shape: {hx.shape}')

inpt: tensor([[[ 0.9042,  0.1181,  1.8941]],

        [[-0.4229,  0.7431,  0.0756]]]), inpt shape: torch.Size([2, 1, 3])
out: tensor([[[ 0.1374, -0.0571, -0.7966, -0.5038,  0.7501, -0.5088, -0.3542,
          -0.6363, -0.7504, -0.6143]],

        [[-0.8717, -0.4163, -0.7654,  0.1273,  0.3883,  0.2443,  0.1815,
           0.0180, -0.7577, -0.6236]]], grad_fn=<CatBackward>), 

out shape: torch.Size([2, 1, 10]), 

hx: tensor([[[-0.8717, -0.4163, -0.7654,  0.1273,  0.3883]],

        [[-0.5088, -0.3542, -0.6363, -0.7504, -0.6143]]],
       grad_fn=<StackBackward>), 

hx shape: torch.Size([2, 1, 5])


In [220]:
for name, param in vanilla_rnn.named_parameters():
    print(name, param, param.shape)
    print()

weight_ih_l0 Parameter containing:
tensor([[ 0.2051, -0.3830,  0.3263],
        [ 0.1340, -0.0514,  0.2051],
        [-0.3184, -0.0978,  0.0044],
        [-0.2584, -0.1143, -0.1929],
        [ 0.4262,  0.3799,  0.3857]], requires_grad=True) torch.Size([5, 3])

weight_hh_l0 Parameter containing:
tensor([[ 0.3655, -0.4283,  0.1496,  0.1648, -0.3280],
        [ 0.1813, -0.2427,  0.0711,  0.1481,  0.3859],
        [-0.2346, -0.3519,  0.4138,  0.1877,  0.2174],
        [ 0.1610, -0.1583,  0.2690, -0.3904,  0.0622],
        [ 0.2022,  0.3056, -0.2233, -0.3023,  0.1676]], requires_grad=True) torch.Size([5, 5])

bias_ih_l0 Parameter containing:
tensor([-0.3552, -0.3717, -0.4022, -0.2169, -0.0854], requires_grad=True) torch.Size([5])

bias_hh_l0 Parameter containing:
tensor([-0.2648, -0.1891, -0.3960,  0.2749, -0.1024], requires_grad=True) torch.Size([5])

weight_ih_l0_reverse Parameter containing:
tensor([[-0.2822,  0.1164, -0.3344],
        [ 0.3130,  0.2572, -0.4395],
        [-0.3510, -0.29

In [221]:
wih = vanilla_rnn.weight_ih_l0
whh = vanilla_rnn.weight_hh_l0
bih = vanilla_rnn.bias_ih_l0
bhh = vanilla_rnn.bias_hh_l0

wihr = vanilla_rnn.weight_ih_l0_reverse
whhr = vanilla_rnn.weight_hh_l0_reverse
bihr = vanilla_rnn.bias_ih_l0_reverse
bhhr = vanilla_rnn.bias_hh_l0_reverse

input = inpt

In [223]:
with torch.no_grad():
    for i in range(hx.shape[0]):
        print(hx[i])
print("--------------------------------")
print("--------------------------------")

tanh = nn.Tanh()
output = []
hid_forward = torch.zeros(1, 5, dtype = torch.float32)
hid_reverse = torch.zeros(1, 5, dtype = torch.float32)

# Forward
for i in range(input.shape[0]):
    x = input[i]
    
    i_forward = x @ torch.transpose(wih, 0, 1) + bih
    h_forward = hid_forward @ torch.transpose(whh, 0, 1) + bhh
    hid_forward = tanh(i_forward + h_forward)
    

# Reverse
reverse_input = torch.flip(input, (0, 1))
    
for i in range(reverse_input.shape[0]):
    x = reverse_input[i]
    
    i_reverse = x @ torch.transpose(wihr, 0, 1) + bihr
    h_reverse = hid_reverse @ torch.transpose(whhr, 0, 1) + bhhr
    hid_reverse = tanh(i_reverse + h_reverse)
    
print(hid_forward)
print(hid_reverse)
print("-----------")

tensor([[-0.8717, -0.4163, -0.7654,  0.1273,  0.3883]], requires_grad=True)
tensor([[-0.5088, -0.3542, -0.6363, -0.7504, -0.6143]], requires_grad=True)
--------------------------------
--------------------------------
tensor([[-0.8717, -0.4163, -0.7654,  0.1273,  0.3883]], grad_fn=<TanhBackward>)
tensor([[-0.5088, -0.3542, -0.6363, -0.7504, -0.6143]], grad_fn=<TanhBackward>)
-----------


# **Multi-layer** RNN

In [15]:
torch.manual_seed(1)

n_in, n_out = 3, 5
n_layers = 2
inpt = torch.tensor([1,2,3,4,5,6], dtype = torch.float32)

inpt = inpt.view(2, 1, 3)

bidirectional_rnn = nn.RNN(n_in, n_out, n_layers)

out, hx = bidirectional_rnn(inpt)

print(f'out: {out}\n\nout shape: {out.shape}\n\nhx: {hx}\n\nhx shape: {hx.shape}')

out: tensor([[[-0.0775,  0.1719,  0.4814,  0.5731, -0.0162]],

        [[-0.2183,  0.0993,  0.7477,  0.5326, -0.0862]]],
       grad_fn=<StackBackward>)

out shape: torch.Size([2, 1, 5])

hx: tensor([[[ 0.0374, -0.0655,  0.9111,  0.6196, -0.6483]],

        [[-0.2183,  0.0993,  0.7477,  0.5326, -0.0862]]],
       grad_fn=<StackBackward>)

hx shape: torch.Size([2, 1, 5])


In [18]:
for name, param in bidirectional_rnn.named_parameters():
    print(name, param, param.shape)
    print()

weight_ih_l0 Parameter containing:
tensor([[ 0.2304, -0.1974, -0.0867],
        [ 0.2099, -0.4210,  0.2682],
        [-0.0920,  0.2275,  0.0622],
        [-0.0548,  0.1240,  0.0221],
        [ 0.1633, -0.1743, -0.0326]], requires_grad=True) torch.Size([5, 3])

weight_hh_l0 Parameter containing:
tensor([[-0.0403,  0.0648, -0.0018,  0.3909,  0.1392],
        [-0.1665, -0.2701, -0.0750, -0.1929, -0.1433],
        [ 0.0214,  0.2666,  0.2431, -0.4372,  0.2772],
        [ 0.1249,  0.4242,  0.2952, -0.4075, -0.4252],
        [-0.2157,  0.3927, -0.0745,  0.1914, -0.2078]], requires_grad=True) torch.Size([5, 5])

bias_ih_l0 Parameter containing:
tensor([ 0.4388, -0.1892,  0.3354,  0.0053, -0.2356], requires_grad=True) torch.Size([5])

bias_hh_l0 Parameter containing:
tensor([ 0.2299, -0.2374,  0.1315, -0.1291, -0.0490], requires_grad=True) torch.Size([5])

weight_ih_l1 Parameter containing:
tensor([[-0.4299, -0.2132,  0.2427, -0.1087,  0.4454],
        [ 0.3585, -0.0209, -0.2985,  0.2723,  0.13

In [30]:
wih_l0 = bidirectional_rnn.weight_ih_l0
wih_l1 = bidirectional_rnn.weight_ih_l1

whh_l0 = bidirectional_rnn.weight_hh_l0
whh_l1 = bidirectional_rnn.weight_hh_l1

bih_l0 = bidirectional_rnn.bias_ih_l0
bih_l1 = bidirectional_rnn.bias_ih_l1

bhh_l0 = bidirectional_rnn.bias_hh_l0
bhh_l1 = bidirectional_rnn.bias_hh_l1

In [58]:
with torch.no_grad():
    print(f'out: {out} {out.shape}')
#     print(f'nhx: {hx} {hx.shape}')
print("--------------------------------------------------------")
print("--------------------------------------------------------")

input = inpt

tanh = nn.Tanh()
hid = torch.zeros(1, n_out, dtype = torch.float32)
output = []


for i in range(input.shape[0]):
    x = input[i]
    
    i = x @ torch.transpose(wih_l0, 0, 1) + bih_l0
    h = hid @ torch.transpose(whh_l0, 0, 1) + bhh_l0
    
    hid = tanh(i + h)
    
    output.append(hid)

output = torch.stack(output, dim = 0)

final_output = []
hid = torch.zeros(1, n_out, dtype = torch.float32)
for i in range(output.shape[0]):
    x = output[i]
    
    i = x @ torch.transpose(wih_l1, 0, 1) + bih_l1
    h = hid @ torch.transpose(whh_l1, 0, 1) + bhh_l1
    
    hid = tanh(i + h)
    
    final_output.append(hid)

final_output = torch.stack(final_output, dim = 0)
print(f'final output: {final_output}')

out: tensor([[[-0.0775,  0.1719,  0.4814,  0.5731, -0.0162]],

        [[-0.2183,  0.0993,  0.7477,  0.5326, -0.0862]]],
       grad_fn=<StackBackward>) torch.Size([2, 1, 5])
--------------------------------------------------------
--------------------------------------------------------
final output: tensor([[[-0.0775,  0.1719,  0.4814,  0.5731, -0.0162]],

        [[-0.2183,  0.0993,  0.7477,  0.5326, -0.0862]]],
       grad_fn=<StackBackward>)
