## Embedding

In [1]:
import torch
import torch.nn as nn

vocab_size = 8
embed_dim = 4
embedding = nn.Embedding(vocab_size, embed_dim)
print(embedding.weight.shape)
print(embedding.weight)

torch.Size([8, 4])
Parameter containing:
tensor([[-1.0519e+00, -7.1848e-01,  1.9329e+00,  9.0842e-01],
        [-1.2014e-03, -1.2857e-01,  6.7015e-01, -1.7427e-01],
        [-5.9494e-01,  5.9013e-01, -1.2110e+00,  1.2570e+00],
        [ 1.1538e+00,  1.2611e+00,  1.0576e+00, -1.0911e+00],
        [-9.1101e-01, -1.4574e+00, -7.1536e-01,  2.9316e-01],
        [-3.7073e-01, -2.4641e+00,  1.4500e+00, -4.6410e-01],
        [-2.1130e-02,  6.0572e-01,  1.3558e+00,  1.4971e+00],
        [-6.0266e-01,  8.9815e-01,  4.8690e-01,  9.7690e-01]],
       requires_grad=True)


In [2]:
data = torch.tensor([0, 4, 7, 2, 1], 
                    dtype=torch.long)
data_embedding = embedding(data)
print(data_embedding)

tensor([[-1.0519e+00, -7.1848e-01,  1.9329e+00,  9.0842e-01],
        [-9.1101e-01, -1.4574e+00, -7.1536e-01,  2.9316e-01],
        [-6.0266e-01,  8.9815e-01,  4.8690e-01,  9.7690e-01],
        [-5.9494e-01,  5.9013e-01, -1.2110e+00,  1.2570e+00],
        [-1.2014e-03, -1.2857e-01,  6.7015e-01, -1.7427e-01]],
       grad_fn=<EmbeddingBackward0>)


## RNN

In [4]:
# sequence_length = 5
# embed_dim = 4


import torch
import torch.nn as nn

data = torch.tensor([[-0.1882,  0.5530,  1.6267,  0.7013],
                     [ 0.2293,  1.3255,  0.1318,  2.0501],
                     [ 0.4309, -1.3067, -0.8823,  1.5977],
                     [ 1.0281, -1.9094,  0.3182,  0.4211],
                     [ 1.7840, -0.8278, -0.2701,  1.3586]], 
                    dtype=torch.float)
print(data.shape)

torch.Size([5, 4])


In [6]:
# does RNN need sequence_length in advance?

embed_dim = 4
hidden_dim = 3
rnn = nn.RNN(embed_dim, hidden_dim, batch_first=True)

In [11]:
# explain shapes of output and hidden

data = data.reshape(1, 5, 4)  # (bs, L, embed_dim)
output, hidden = rnn(data)
print('output.shape:', output.shape) # (bs, L, hidden_dim)
print('hidden.shape:', hidden.shape) # (1, bs, embed_dim)

output.shape: torch.Size([1, 5, 3])
hidden.shape: torch.Size([1, 1, 3])


In [10]:
output[:, -1, :]

tensor([[-0.2511, -0.0588,  0.4034]], grad_fn=<SliceBackward0>)

In [13]:
hidden[-1, :, :]

tensor([[-0.2511, -0.0588,  0.4034]], grad_fn=<SliceBackward0>)

In [15]:
import torchinfo

embed_dim = 4
hidden_dim = 3
rnn = nn.RNN(embed_dim, hidden_dim, 
             batch_first=True)

batch_size = 1
sequence_length = 5
torchinfo.summary(rnn, (batch_size, sequence_length, embed_dim)) 

Layer (type:depth-idx)                   Output Shape              Param #
RNN                                      [1, 50, 3]                27
Total params: 27
Trainable params: 27
Non-trainable params: 0
Total mult-adds (Units.MEGABYTES): 0.00
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00

In [25]:
from torchsummary import summary
import torch

embed_dim = 4
hidden_dim = 3
rnn = nn.RNN(embed_dim, hidden_dim, 
             batch_first=True)

data = torch.randn(32, 5, 4)

batch_size = 1
sequence_length = 5
summary(rnn, data) 

Layer (type:depth-idx)                   Output Shape              Param #
└─RNN: 0-1                               [-1, 5, 3]                27
Total params: 27
Trainable params: 27
Non-trainable params: 0
Total mult-adds (M): 0.00
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00


Layer (type:depth-idx)                   Output Shape              Param #
└─RNN: 0-1                               [-1, 5, 3]                27
Total params: 27
Trainable params: 27
Non-trainable params: 0
Total mult-adds (M): 0.00
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00

In [23]:
import torchinfo

embed_dim = 4
hidden_dim = 3
rnn = nn.RNN(embed_dim, hidden_dim, 
             batch_first=True, bias=False)

batch_size = 1
sequence_length = 5
torchinfo.summary(rnn, (batch_size, sequence_length, embed_dim)) 

Layer (type:depth-idx)                   Output Shape              Param #
RNN                                      [1, 5, 3]                 21
Total params: 21
Trainable params: 21
Non-trainable params: 0
Total mult-adds (Units.MEGABYTES): 0.00
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00

## Stack of RNNs

In [17]:
import torchinfo

embed_dim = 4
hidden_dim = 3
rnn = nn.RNN(embed_dim, hidden_dim, num_layers=2, batch_first=True)

batch_size = 1
sequence_length = 5
torchinfo.summary(rnn, (batch_size, sequence_length, embed_dim)) 

Layer (type:depth-idx)                   Output Shape              Param #
RNN                                      [1, 5, 3]                 51
Total params: 51
Trainable params: 51
Non-trainable params: 0
Total mult-adds (Units.MEGABYTES): 0.00
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00

In [None]:
# explain shapes of output and hidden
data = torch.tensor([[-0.1882,  0.5530,  1.6267,  0.7013],
                     [ 0.2293,  1.3255,  0.1318,  2.0501],
                     [ 0.4309, -1.3067, -0.8823,  1.5977],
                     [ 1.0281, -1.9094,  0.3182,  0.4211],
                     [ 1.7840, -0.8278, -0.2701,  1.3586]], 
                    dtype=torch.float)
data = data.reshape(1, 5, 4)

output, hidden = rnn(data)
print('output.shape:', output.shape)
print('hidden.shape:', hidden.shape)

In [None]:
output

In [None]:
hidden

## bidirectional

In [20]:
import torchinfo

embed_dim = 4
hidden_dim = 3
rnn = nn.RNN(embed_dim, hidden_dim, bidirectional=True, batch_first=True)

batch_size = 1
sequence_length = 5
torchinfo.summary(rnn, (batch_size, sequence_length, embed_dim)) 

Layer (type:depth-idx)                   Output Shape              Param #
RNN                                      [1, 5, 6]                 54
Total params: 54
Trainable params: 54
Non-trainable params: 0
Total mult-adds (Units.MEGABYTES): 0.00
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00

## LSTM

In [None]:
import torchinfo
import torch.nn as nn

embed_dim = 4
hidden_dim = 3
lstm = nn.LSTM(embed_dim, hidden_dim, batch_first=True)

batch_size = 1
sequence_length = 5
torchinfo.summary(lstm, (batch_size, sequence_length, embed_dim)) 