In [None]:
import os
import re
import pickle
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset

```
torch.nn.LSTM(input_size, hidden_size, num_layers, batch_first, dropout, bidirectional)
```
1.input_size: embedding_dim  
2.hidden_size: number of LSTM per layer  
3.num_layers: number of layers in RNN  
4.batch_first: default False ([seq_len, batch, feature]); True ([batch, seq_len, feature])  
5.dropout: default 0. Deactivate some parameters to speed up training/prevent overfitting.  
6/bidirectional: default False.  
Initialize:  
Input data and h_0 and c_0. E.g.  
```
lstm(input, (h_n, c_n))
```

1.output: (seq_len, batch, num_directions*hidden_size) -> batch_first=False  
2.h_n: (num_layers * num_directions, batch, hidden_size)  
3.c_n: (num_layers * num_directions, batch, hidden_size) 

**Hyperparameters**

In [None]:
batch_size = 10
seq_len = 20
vocab_size = 100
embedding_dim = 30
hidden_size = 18
num_layer = 1

**Fake Data**

In [None]:
input = torch.randint(low=0, high=100, size=[batch_size, seq_len])
input

tensor([[37, 26, 97, 69, 92, 21, 90, 93, 39, 79, 71, 43, 65, 57, 92, 11,  0, 10,
         51,  1],
        [56, 71, 56, 83,  6, 56,  1, 26, 64, 95, 20, 66, 10, 16, 71, 19, 20, 75,
         25, 63],
        [93, 85, 26, 28, 64, 34, 69, 52, 92, 30, 48, 24, 60, 43, 77, 47, 47, 47,
         65, 31],
        [13, 63, 81, 21, 58, 52, 71, 30, 40, 70, 82, 66, 11, 47,  7, 35, 40, 71,
         78, 78],
        [53, 68, 83, 14, 88, 93, 71, 13, 72, 14, 77,  1, 11, 29, 75,  3, 87, 67,
         55, 33],
        [90, 21,  0,  1, 65, 31, 28, 38, 27, 89, 82, 70, 65, 38, 58, 59, 31, 73,
         75, 48],
        [83, 93, 12, 74,  0, 49, 90, 24, 89, 67, 20,  8, 17, 95, 75, 72, 60, 94,
         95, 50],
        [88, 29, 99, 88, 22, 64, 84, 20,  2, 76,  8, 45, 38, 54, 72, 51, 90, 56,
         61, 42],
        [25, 13, 19, 86, 48, 31, 33, 48, 32,  7, 86,  8, 38, 95, 50, 73, 87, 86,
         33, 81],
        [28, 93, 48, 29, 91, 65, 56, 62, 68, 54, 71,  0,  2, 60, 75, 32, 74, 37,
          8,  9]])

**Embedding**

In [None]:
embedding = nn.Embedding(num_embeddings=vocab_size, embedding_dim=embedding_dim)

In [None]:
input_embeded = embedding(input)  # (batch_size, seq_len, embedding_dim) (10, 20, 30)

**LSTM**

In [None]:
lstm = nn.LSTM(input_size=embedding_dim, hidden_size=hidden_size, num_layers=num_layer, batch_first=True)

In [None]:
output, (h_n, c_n) = lstm(input_embeded)

In [None]:
print(output.size())  # (batch_size, seq_len, num_directions * hidden_size) (10, 20, 18)

torch.Size([10, 20, 18])


In [None]:
print(h_n.size()) # (num_directions * num_layers, batch_size, hidden_size) (1, 20, 18)

torch.Size([1, 10, 18])


In [None]:
print(c_n.size()) # (num_directions * num_layers, batch_size, hidden_size) (1, 20, 18)

torch.Size([1, 10, 18])
