<a href="https://colab.research.google.com/github/HarshavardhanK/machine-learning-book/blob/main/RNN_Raschka.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn

In [2]:
!pip install 'portalocker>=2.0.0'

Collecting portalocker>=2.0.0
  Downloading portalocker-2.8.2-py3-none-any.whl (17 kB)
Installing collected packages: portalocker
Successfully installed portalocker-2.8.2


In [3]:
torch.manual_seed(1)

<torch._C.Generator at 0x7db8b4281290>

In [4]:
rnn_layer = nn.RNN(input_size=5, hidden_size=3, num_layers=1, batch_first=True)

In [5]:
w_xh = rnn_layer.weight_ih_l0
w_hh = rnn_layer.weight_hh_l0
b_xh = rnn_layer.bias_ih_l0
b_hh = rnn_layer.bias_hh_l0

In [6]:
print("w_xh shape: ", w_xh.shape)
print("w_hh shape: ", w_hh.shape)
print("b_xh shape: ", b_xh.shape)
print("b_hh shape: ", b_xh.shape)

w_xh shape:  torch.Size([3, 5])
w_hh shape:  torch.Size([3, 3])
b_xh shape:  torch.Size([3])
b_hh shape:  torch.Size([3])


In [7]:
x_seq = torch.tensor([[1.0] * 5, [2.0] * 5, [3.0] * 5, [4.0] * 5]).float()
x_seq.shape

torch.Size([4, 5])

In [8]:
output, hnn = rnn_layer(torch.reshape(x_seq, (1, 4, 5)))
hnn

tensor([[[-0.8190,  0.8263,  0.8389]]], grad_fn=<StackBackward0>)

In [9]:
out_man = []

for t in range(4):
  xt = torch.reshape(x_seq[t], (1, 5))
  print(xt.shape)
  print("Time step {t} => ")
  print('   Input     ', xt.numpy())
  print('W_xh shape: ', w_xh.shape)
  print("Transpose shape: ", torch.transpose(w_xh, 0, 1).shape)

  ht = torch.matmul(xt, torch.transpose(w_xh, 0, 1)) + b_xh
  print("ht shape: ", ht.shape)

  print('     Hidden      :', ht.detach().numpy())

  if t > 0:
    prev_h = out_man[t - 1]

  else:
    prev_h = torch.zeros((ht.shape))


  ot = ht + torch.matmul(prev_h, torch.transpose(w_hh, 0, 1)) + b_hh

  ot = torch.tanh(ot)
  out_man.append(ot)

  print("   Output (manual)    ", ot.detach().numpy())
  print("   RNN output         ", output[:, t].detach().numpy())

torch.Size([1, 5])
Time step {t} => 
   Input      [[1. 1. 1. 1. 1.]]
W_xh shape:  torch.Size([3, 5])
Transpose shape:  torch.Size([5, 3])
ht shape:  torch.Size([1, 3])
     Hidden      : [[-0.52682793  0.55840755  0.47656733]]
   Output (manual)     [[-0.20985235 -0.00598161  0.6828735 ]]
   RNN output          [[-0.20985247 -0.00598161  0.6828735 ]]
torch.Size([1, 5])
Time step {t} => 
   Input      [[2. 2. 2. 2. 2.]]
W_xh shape:  torch.Size([3, 5])
Transpose shape:  torch.Size([5, 3])
ht shape:  torch.Size([1, 3])
     Hidden      : [[-0.8686449   1.0891707   0.60896015]]
   Output (manual)     [[-0.49751765  0.2645275   0.7016616 ]]
   RNN output          [[-0.49751776  0.2645275   0.70166165]]
torch.Size([1, 5])
Time step {t} => 
   Input      [[3. 3. 3. 3. 3.]]
W_xh shape:  torch.Size([3, 5])
Transpose shape:  torch.Size([5, 3])
ht shape:  torch.Size([1, 3])
     Hidden      : [[-1.210462   1.6199336  0.741353 ]]
   Output (manual)     [[-0.69121206  0.60507196  0.7903675 ]]
   R

In [10]:
from torchtext.datasets import IMDB
from torchtext.vocab import vocab
from torch.utils.data.dataset import random_split

In [11]:
train_dataset = IMDB(split='train')
test_dataset = IMDB(split='test')

In [12]:
torch.manual_seed(1)

<torch._C.Generator at 0x7db8b4281290>

In [13]:
train_dataset, validation_dataset = random_split(list(train_dataset), [20000, 5000])

In [14]:
import re
from collections import Counter, OrderedDict

token_counts = Counter()

def tokenizer(text):
    text = re.sub('<[^>]*>', '', text)
    emoticons = re.findall('(?::|;|=)(?:-)?(?:\)|\(|D|P)', text.lower())
    text = re.sub('[\W]+', ' ', text.lower()) +\
        ' '.join(emoticons).replace('-', '')
    tokenized = text.split()
    return tokenized

for label, line in train_dataset:
    tokens = tokenizer(line)
    token_counts.update(tokens)


print('Vocab-size:', len(token_counts))

Vocab-size: 69023


In [15]:
sorted_by_freq_tuples = sorted(token_counts.items(), key=lambda x: x[1], reverse=True)
ordered_dict = OrderedDict(sorted_by_freq_tuples)

vocab = vocab(ordered_dict)

vocab.insert_token("<pad>", 0)
vocab.insert_token("<unk>", 1)
vocab.set_default_index(1)

print([vocab[token] for token in ['this', 'is', 'an', 'example']])

[11, 7, 35, 457]


In [16]:
text_pipeline = lambda x : [vocab[token] for token in tokenizer(x)]
label_pipeline = lambda x: 1. if x == "pos" else 0

In [67]:
device = torch.device(torch.device('cuda') if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [100]:
def collate_batch(batch):
    label_list, text_list, lengths = [], [], []

    for _label, _text in batch:

        label_list.append(label_pipeline(_label))
        processed_text = torch.tensor(text_pipeline(_text))

        text_list.append(processed_text)
        lengths.append(processed_text.size(0))

    label_list = torch.tensor(label_list)
    lengths = torch.tensor(lengths)
    padded_text_list = nn.utils.rnn.pad_sequence(text_list, batch_first=True)

    return padded_text_list.to(device), label_list.to(device), lengths.to(device)


In [101]:
from torch.utils.data import DataLoader

In [102]:
train_dl = DataLoader(train_dataset, batch_size=32, collate_fn=collate_batch, shuffle=True)
valid_dl = DataLoader(validation_dataset, batch_size=32, collate_fn=collate_batch, shuffle=False)
test_dl = DataLoader(test_dataset, batch_size=32, collate_fn=collate_batch, shuffle=False)

In [103]:
## Create dummy embeddings
embed = nn.Embedding(num_embeddings=10, embedding_dim=4, padding_idx=0) #providing padding index will not affect the 0th index during gradient update
text_encoded_input = torch.LongTensor([[1,1,1,1], [1,1,1,1]])
print(embed(text_encoded_input))

tensor([[[-1.1660, -1.0748,  1.3168, -0.6818],
         [-1.1660, -1.0748,  1.3168, -0.6818],
         [-1.1660, -1.0748,  1.3168, -0.6818],
         [-1.1660, -1.0748,  1.3168, -0.6818]],

        [[-1.1660, -1.0748,  1.3168, -0.6818],
         [-1.1660, -1.0748,  1.3168, -0.6818],
         [-1.1660, -1.0748,  1.3168, -0.6818],
         [-1.1660, -1.0748,  1.3168, -0.6818]]], grad_fn=<EmbeddingBackward0>)


In [104]:
class Dummy_RNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super().__init__()
        self.rnn = nn.RNN(input_size, hidden_size, num_layers=2, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        _, hidden = self.rnn(x)
        out = hidden[-1, :, :]
        out = self.fc(out)

        return out

In [105]:
model = Dummy_RNN(64, 32)
print(model)

Dummy_RNN(
  (rnn): RNN(64, 32, num_layers=2, batch_first=True)
  (fc): Linear(in_features=32, out_features=1, bias=True)
)


In [106]:
model(torch.randn(2, 3, 64))

tensor([[-0.1765],
        [-0.4967]], grad_fn=<AddmmBackward0>)

In [107]:
## Take a small batch

from torch.utils.data import DataLoader
dataloader = DataLoader(train_dataset, batch_size=4, shuffle=False, collate_fn=collate_batch)
text_batch, label_batch, length_batch = next(iter(dataloader))
print(text_batch)
print(label_batch)
print(length_batch)
print(text_batch.shape)

tensor([[   35,  1739,     7,   449,   721,     6,   301,     4,   787,     9,
             4,    18,    44,     2,  1705,  2460,   186,    25,     7,    24,
           100,  1874,  1739,    25,     7, 34415,  3568,  1103,  7517,   787,
             5,     2,  4991, 12401,    36,     7,   148,   111,   939,     6,
         11598,     2,   172,   135,    62,    25,  3199,  1602,     3,   928,
          1500,     9,     6,  4601,     2,   155,    36,    14,   274,     4,
         42945,     9,  4991,     3,    14, 10296,    34,  3568,     8,    51,
           148,    30,     2,    58,    16,    11,  1893,   125,     6,   420,
          1214,    27, 14542,   940,    11,     7,    29,   951,    18,    17,
         15994,   459,    34,  2480, 15211,  3713,     2,   840,  3200,     9,
          3568,    13,   107,     9,   175,    94,    25,    51, 10297,  1796,
            27,   712,    16,     2,   220,    17,     4,    54,   722,   238,
           395,     2,   787,    32,    27,  5236,  

In [121]:
class RNN(nn.Module):
    def __init__(self, vocab_size, embedding_dim, fc_hidden_size, rnn_hidden_size):
        super().__init__()

        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        self.LSTM = nn.LSTM(embedding_dim, rnn_hidden_size, batch_first=True)

        self.fully_connected1 = nn.Linear(rnn_hidden_size, fc_hidden_size)
        self.Relu = nn.ReLU()

        self.fully_connected2 = nn.Linear(fc_hidden_size, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, text, lengths):
        out = self.embedding(text)
        out = nn.utils.rnn.pack_padded_sequence(out, lengths.cpu().numpy(), enforce_sorted=False, batch_first=True)

        out, (hidden, cell) = self.LSTM(out)
        out = hidden[-1, :, :]

        out = self.fully_connected1(out)
        out = self.Relu(out)

        out = self.fully_connected2(out)
        out = self.sigmoid(out)

        return out

In [122]:
vocab_size = len(vocab)
embed_dim = 20
rnn_hidden_size = 64
fc_hidden_size = 64

torch.manual_seed(1)

<torch._C.Generator at 0x7db8b4281290>

In [123]:
model = RNN(vocab_size=vocab_size, embedding_dim=embed_dim, rnn_hidden_size=rnn_hidden_size, fc_hidden_size=fc_hidden_size)
model = model.to(device)

In [124]:
print(model)

RNN(
  (embedding): Embedding(69025, 20, padding_idx=0)
  (LSTM): LSTM(20, 64, batch_first=True)
  (fully_connected1): Linear(in_features=64, out_features=64, bias=True)
  (Relu): ReLU()
  (fully_connected2): Linear(in_features=64, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)


In [125]:
optimizer = torch.optim.Adam(model.parameters(), lr= 0.001)
loss_fn = nn.BCELoss()

In [129]:
def train(dataloader):
    model.train()

    total_acc, total_loss = 0, 0

    for text_batch, label_batch, length in dataloader:

        optimizer.zero_grad()

        label_batch = label_batch.to(torch.float32)
        pred = model(text_batch, length)[:, 0]
        loss = loss_fn(pred, label_batch)

        loss.backward()
        optimizer.step()

        total_acc += ((pred>=0.5).float() == label_batch).float().sum().item()
        total_loss += loss.item() * label_batch.size(0)

        ln = len(dataloader.dataset)

    return total_acc / ln, total_loss / ln


In [130]:
def evaluate(dataloader):
    model.eval()

    total_acc, total_loss = 0, 0

    with torch.no_grad():
        for text_batch, label_batch, lengths in dataloader:
            label_batch = label_batch.to(torch.float32)
            pred = model(text_batch, lengths)[:, 0]
            loss = loss_fn(pred, label_batch)

            total_acc += ((pred >= 0.5).float() == label_batch).float().sum().item()
            total_loss += loss.item() * label_batch.size(0)

            ln = len(dataloader.dataset)

    return total_acc / ln, total_loss / ln

In [None]:
num_epochs = 10
torch.manual_seed(1)

for epoch in range(num_epochs):
    acc_train, loss_train = train(dataloader=train_dl)
    acc_valid, loss_valid = evaluate(dataloader=valid_dl)

    print(f'Epoch {epoch} accuracy: {acc_train: .4f} '
          f'val acc: {acc_valid: .4f}')

Epoch 0 accuracy:  0.9987 val acc:  1.0000


In [116]:
torchtext.__version__

NameError: ignored

In [None]:
import torchtext

In [None]:
from torchtext import __version__ as torchtext_version
from pkg_resources import parse_version

if parse_version(torchtext.__version__) > parse_version("0.10"):
    label_pipeline = lambda x: 1. if x == 2 else 0.         # 1 ~ negative, 2 ~ positive review
    print('sup man')
else:
    label_pipeline = lambda x: 1. if x == 'pos' else 0.
    print('this')

this
