In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
%config Completer.use_jedi = False

In [11]:
sentences = ["This is cool", "Movie sucks"]

word_to_idx = {"<PAD>" : 0}
idx_to_word = {0 : "<PAD>"}

for sent in sentences:
    for word in sent.split(' '):
        if word not in word_to_idx:
            idx_to_word[len(word_to_idx)] = word
            word_to_idx[word] = len(word_to_idx)
            
print(word_to_idx)
print()
print(idx_to_word)

vectors = []

for sent in sentences:
    word_arr = []
    for word in sent.split(' '):
        word_arr.append(word_to_idx[word])
    if len(sent.split(' ')) < 3:
        for i in range(3 - len(sent.split(' '))):
            word_arr.append(word_to_idx["<PAD>"])
    vectors.append(word_arr)
    
print(vectors)

x = torch.from_numpy(np.asarray(vectors))

print(x)
print(x.shape)

{'<PAD>': 0, 'This': 1, 'is': 2, 'cool': 3, 'Movie': 4, 'sucks': 5}

{0: '<PAD>', 1: 'This', 2: 'is', 3: 'cool', 4: 'Movie', 5: 'sucks'}
[[1, 2, 3], [4, 5, 0]]
tensor([[1, 2, 3],
        [4, 5, 0]])
torch.Size([2, 3])


In [28]:
torch.manual_seed(1)

emb_dim = 4
rnn_hidden_size = 5
emb = nn.Embedding(len(word_to_idx), emb_dim, padding_idx=0)
x_emb = emb(x)

print("X after passing through embedding layer:")
print(x_emb)
print(x_emb.shape)

X after passing through embedding layer:
tensor([[[-0.1002, -0.6092, -0.9798, -1.6091],
         [ 0.4391,  1.1712,  1.7674, -0.0954],
         [ 0.1394, -1.5785, -0.3206, -0.2993]],

        [[-0.7984,  0.3357,  0.2753,  1.7163],
         [-0.0561,  0.9107, -1.3924,  2.6891],
         [ 0.0000,  0.0000,  0.0000,  0.0000]]], grad_fn=<EmbeddingBackward>)
torch.Size([2, 3, 4])


In [46]:
torch.manual_seed(1)
rnn = nn.GRU(emb_dim, rnn_hidden_size, batch_first = True,bias=False)
print("RNN weights: ")
print(rnn.weight_ih_l0)
        
print()
print("Individual weight matrices:")
wih_r = rnn.weight_ih_l0[0:5, :]
wih_z = rnn.weight_ih_l0[5:10,:]
wih_n = rnn.weight_ih_l0[10:15, :]
print(wih_r)
print()
print(wih_z)
print()
print(wih_n)

RNN weights: 
Parameter containing:
tensor([[ 0.2304, -0.1974, -0.0867,  0.2099],
        [-0.4210,  0.2682, -0.0920,  0.2275],
        [ 0.0622, -0.0548,  0.1240,  0.0221],
        [ 0.1633, -0.1743, -0.0326, -0.0403],
        [ 0.0648, -0.0018,  0.3909,  0.1392],
        [-0.1665, -0.2701, -0.0750, -0.1929],
        [-0.1433,  0.0214,  0.2666,  0.2431],
        [-0.4372,  0.2772,  0.1249,  0.4242],
        [ 0.2952, -0.4075, -0.4252, -0.2157],
        [ 0.3927, -0.0745,  0.1914, -0.2078],
        [ 0.4388, -0.1892,  0.3354,  0.0053],
        [-0.2356,  0.2299, -0.2374,  0.1315],
        [-0.1291, -0.0490, -0.4299, -0.2132],
        [ 0.2427, -0.1087,  0.4454,  0.3585],
        [-0.0209, -0.2985,  0.2723,  0.1388]], requires_grad=True)

Individual weight matrices:
tensor([[ 0.2304, -0.1974, -0.0867,  0.2099],
        [-0.4210,  0.2682, -0.0920,  0.2275],
        [ 0.0622, -0.0548,  0.1240,  0.0221],
        [ 0.1633, -0.1743, -0.0326, -0.0403],
        [ 0.0648, -0.0018,  0.3909,  0.1

In [48]:
print("RNN weights:")
print(rnn.weight_hh_l0)

print()
print("Individual weight matrices:")
whh_r = rnn.weight_hh_l0[0:5, :]
whh_z = rnn.weight_hh_l0[5:10,:]
whh_n = rnn.weight_hh_l0[10:15, :]
print(whh_r)
print()
print(whh_z)
print()
print(whh_n)

RNN weights:
Parameter containing:
tensor([[-0.2891,  0.2905,  0.2715,  0.3966, -0.2507],
        [-0.0736, -0.0087,  0.0653, -0.3394, -0.3174],
        [ 0.2433, -0.1049,  0.2185,  0.0255,  0.1468],
        [ 0.0983,  0.1626,  0.2217, -0.4142,  0.2251],
        [-0.3144, -0.3374,  0.0272, -0.0762,  0.2627],
        [-0.2590, -0.3976,  0.3255, -0.0663,  0.2515],
        [ 0.1438, -0.3354,  0.0898,  0.1074, -0.2994],
        [-0.2122,  0.1525,  0.0801, -0.1902, -0.1354],
        [ 0.4096, -0.0827,  0.2521,  0.1936, -0.2891],
        [-0.3803,  0.4293,  0.0234,  0.3065,  0.0927],
        [ 0.1438,  0.3341,  0.4241, -0.2968,  0.0559],
        [ 0.3337,  0.3240,  0.2778, -0.3237, -0.3220],
        [-0.2705,  0.0562,  0.4457, -0.2825,  0.2383],
        [-0.2475, -0.4205, -0.0951,  0.2577,  0.4152],
        [-0.2777,  0.0971,  0.3859,  0.2963,  0.2787]], requires_grad=True)

Individual weight matrices:
tensor([[-0.2891,  0.2905,  0.2715,  0.3966, -0.2507],
        [-0.0736, -0.0087,  0.0653,

In [67]:
print("Result of Pytorch:")
out, hx = rnn(x_emb)
print(out)

Result of Pytorch:
tensor([[[-0.0941, -0.0626,  0.4790, -0.2041, -0.1372],
         [ 0.2963, -0.1015,  0.0473,  0.3765, -0.0092],
         [ 0.2581, -0.2344,  0.1297,  0.2794,  0.1541]],

        [[-0.1746,  0.1412, -0.0872,  0.3289,  0.1479],
         [-0.4669,  0.3670, -0.0742,  0.2993, -0.0806],
         [-0.2444,  0.1372, -0.0368,  0.1408, -0.0089]]],
       grad_fn=<TransposeBackward1>)


In [78]:
x_in = x_emb.permute(1, 0, 2)
# print("x after permuted:")
# print(x_in)
# print()

# print("My result")

batch_size, seq_size, feat_size = x_emb.shape
my_out = []
hidden_t = torch.zeros((batch_size, rnn_hidden_size))
sigmoid = nn.Sigmoid()
tanh = nn.Tanh()

for i in range(seq_size):
    x_inp = x_in[i]
    
    r1 = x_inp @ torch.transpose(wih_r, 0, 1)
    r2 = hidden_t @ torch.transpose(whh_r, 0, 1)
    
    r = sigmoid(r1 + r2)
    
    z1 = x_inp @ torch.transpose(wih_z, 0, 1)
    z2 = hidden_t @ torch.transpose(whh_z, 0, 1)
    
    z = sigmoid(z1 + z2)
    
    n1 = x_inp @ torch.transpose(wih_n, 0, 1)
    n2 = r * (hidden_t @ torch.transpose(whh_n, 0, 1))
    
    n = tanh(n1+n2)
    
    hidden_t = (1 - z) * n + (z * hidden_t)
    
    my_out.append(hidden_t)
    
my_out = torch.stack(my_out)
my_out = my_out.permute(1, 0, 2)

In [84]:
torch.manual_seed(1)

# print()
# print("Pytorch out:")
# print(out)

# print()
# print("Pytorch hiddens:")
# print(hx)

print()
print("My out:")
print(my_out)
print(my_out.shape)

fc = nn.Linear(rnn_hidden_size, 1)

my_out_for_lin = my_out.contiguous().view(-1, rnn_hidden_size)
print()
print("My out for linear layer:")
print(my_out_for_lin)
print(my_out_for_lin.shape)

print()
my_out_from_lin = fc(my_out_for_lin)
print("My out from linear layer:")
print(my_out_from_lin)
print(my_out_from_lin.shape)

print()
print("Reshape my_out_from_lin to be batch_first:")
my_out_semi_final = my_out_from_lin.view(batch_size, -1)
print(my_out_semi_final)
print(my_out_semi_final.shape)

print()
print("Final out:")
final_out = my_out_semi_final[:, -1]
print(final_out)
print(final_out.shape)


My out:
tensor([[[-0.0941, -0.0626,  0.4790, -0.2041, -0.1372],
         [ 0.2963, -0.1015,  0.0473,  0.3765, -0.0092],
         [ 0.2581, -0.2344,  0.1297,  0.2794,  0.1541]],

        [[-0.1746,  0.1412, -0.0872,  0.3289,  0.1479],
         [-0.4669,  0.3670, -0.0742,  0.2993, -0.0806],
         [-0.2444,  0.1372, -0.0368,  0.1408, -0.0089]]],
       grad_fn=<PermuteBackward>)
torch.Size([2, 3, 5])

My out for linear layer:
tensor([[-0.0941, -0.0626,  0.4790, -0.2041, -0.1372],
        [ 0.2963, -0.1015,  0.0473,  0.3765, -0.0092],
        [ 0.2581, -0.2344,  0.1297,  0.2794,  0.1541],
        [-0.1746,  0.1412, -0.0872,  0.3289,  0.1479],
        [-0.4669,  0.3670, -0.0742,  0.2993, -0.0806],
        [-0.2444,  0.1372, -0.0368,  0.1408, -0.0089]], grad_fn=<ViewBackward>)
torch.Size([6, 5])

My out from linear layer:
tensor([[0.2322],
        [0.4353],
        [0.3565],
        [0.2144],
        [0.1914],
        [0.2213]], grad_fn=<AddmmBackward>)
torch.Size([6, 1])

Reshape my_out