<a href="https://colab.research.google.com/github/Jhansipothabattula/Machine_Learning/blob/main/Day61.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Introduction to Sequence Modeling and RNNs

**Objective**

- Preprocess a text dataset for use in RNNs and setup an environment in Tensorflow or PyTorch for building RNNs

In [3]:
from tensorflow.keras.datasets import imdb
from tensorflow.keras.utils import pad_sequences
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense

vocab_size = 10000
max_len = 200

(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=vocab_size)

X_train = pad_sequences(X_train, maxlen = max_len, padding="post")
X_test = pad_sequences(X_test, maxlen = max_len, padding ="post")

print(f"Training Data shape: {X_train.shape}")
print(f"Testing Data shape: {X_test.shape}")

model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=128),
    SimpleRNN(128, activation="tanh", return_sequences=False),
    Dense(units=1, activation="sigmoid")
])

model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

model.summary()

history = model.fit(X_train, y_train, epochs = 5, batch_size = 32, validation_split = 0.2)

loss, accuracy = model.evaluate(X_test, y_test)

print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Training Data shape: (25000, 200)
Testing Data shape: (25000, 200)


Epoch 1/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 102ms/step - accuracy: 0.5142 - loss: 0.6990 - val_accuracy: 0.5116 - val_loss: 0.6927
Epoch 2/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 101ms/step - accuracy: 0.5367 - loss: 0.6874 - val_accuracy: 0.5258 - val_loss: 0.6826
Epoch 3/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 99ms/step - accuracy: 0.5900 - loss: 0.6556 - val_accuracy: 0.5396 - val_loss: 0.6862
Epoch 4/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 98ms/step - accuracy: 0.6045 - loss: 0.6241 - val_accuracy: 0.5428 - val_loss: 0.6871
Epoch 5/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 102ms/step - accuracy: 0.6128 - loss: 0.5966 - val_accuracy: 0.5470 - val_loss: 0.6974
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 26ms/step - accuracy: 0.5461 - loss: 0.7018
Test Loss: 0.7038
Test Accuracy: 0.5416


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences

vocab_size =10000
max_len = 200

(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=vocab_size)

X_train = pad_sequences(X_train, maxlen= max_len, padding="post")
X_test = pad_sequences(X_test, maxlen=max_len, padding="post")

train_dataset = TensorDataset(torch.tensor(X_train), torch.tensor(y_train))
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

class RNNModel(nn.Module):
  def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim):
    super(RNNModel, self).__init__()
    self.embedding = nn.Embedding(vocab_size, embedding_dim)
    self.rnn = nn.RNN(embedding_dim, hidden_dim, batch_first=True)
    self.fc = nn.Linear(hidden_dim, output_dim)

  def forward(self, x):
    embedded = self.embedding(x)
    output, hidden = self.rnn(embedded)
    return torch.sigmoid(self.fc(hidden.squeeze(0)))

model = RNNModel(vocab_size = 10000, embedding_dim = 128, hidden_dim = 128, output_dim = 1)

criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

def train_rnn(model, train_loader, criterion, optimizer, epochs=5):
  model.train()
  for epoch in range(epochs):
    epoch_loss = 0.0
    for X_batch, y_batch in train_loader:
      optimizer.zero_grad()
      predictions = model(X_batch).squeeze(1)
      loss = criterion(predictions, y_batch.float())
      loss.backward()
      optimizer.step()
      epoch_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {epoch_loss/len(train_loader):.4f}")

train_rnn(model, train_loader, criterion, optimizer)

def evaluate_rnn(model, X_test, y_test):
  model.eval()
  with torch.no_grad():
    predictions = model(torch.tensor(X_test)).squeeze(1)
    loss = criterion(predictions, torch.tensor(y_test).float())
    accuracy = ((predictions >= 0.5) == torch.tensor(y_test)).float().mean().item()
  print(f"Test Loss: {loss.item():.4f}")
  print(f"Test Accuracy: {accuracy.item():.4f}")

evaluate_rnn(model, X_test, y_test)


Epoch 1, Loss: 0.6851
Epoch 2, Loss: 0.6512
Epoch 3, Loss: 0.6396
Epoch 4, Loss: 0.5954
Epoch 5, Loss: 0.5600
