# Recurrent Neural Network (RNN) for Text Classification
This notebook demonstrates a basic RNN model using PyTorch to classify IMDB movie reviews as positive or negative.

In [18]:
# Import required PyTorch modules
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence


In [19]:
# Manually created small dataset with labeled sentiment examples
texts = [
    "good movie",
    "bad acting",
    "awesome film",
    "terrible movie",
    "loved it",
    "hated it"
]
labels = [1, 0, 1, 0, 1, 0]  # 1 = positive, 0 = negative


In [20]:
# Simple tokenizer that splits text into lowercase words
tokenizer = lambda x: x.lower().split()
# Build vocabulary: assign a unique ID to each word
vocab = {"<PAD>": 0}  # Start with <PAD> for padding
for sentence in texts:
    for word in tokenizer(sentence):
        if word not in vocab:
            vocab[word] = len(vocab)

In [21]:
# Custom dataset class for PyTorch
class TextDataset(Dataset):
    def __init__(self, texts, labels):
        self.data = [(torch.tensor(label, dtype=torch.float), encode(text)) for text, label in zip(texts, labels)]

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

# Custom collate function to pad variable-length sequences
def collate_batch(batch):
    label_list, text_list = [], []
    for label, text in batch:
        label_list.append(label)
        text_list.append(text)
    return pad_sequence(text_list, batch_first=True), torch.tensor(label_list)

# DataLoader to create batches from dataset
dataset = TextDataset(texts, labels)
loader = DataLoader(dataset, batch_size=2, shuffle=True, collate_fn=collate_batch)


In [22]:
# Simple RNN model with embedding, RNN layer, and a fully connected output layer
class SimpleRNN(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=0)
        self.rnn = nn.RNN(embed_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.embedding(x)
        _, h_n = self.rnn(x)
        out = self.fc(h_n.squeeze(0))
        return self.sigmoid(out).squeeze()


In [2]:
pip install torch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2


Collecting sympy (from torch==2.0.1)
  Using cached sympy-1.14.0-py3-none-any.whl.metadata (12 kB)
Collecting nvidia-cublas-cu11==11.10.3.66 (from torch==2.0.1)
  Using cached nvidia_cublas_cu11-11.10.3.66-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Using cached nvidia_cublas_cu11-11.10.3.66-py3-none-manylinux1_x86_64.whl (317.1 MB)
Using cached sympy-1.14.0-py3-none-any.whl (6.3 MB)
[0mInstalling collected packages: sympy, nvidia-cublas-cu11
[0mSuccessfully installed nvidia-cublas-cu11 sympy


In [3]:
import torch
print(torch.__version__)


2.0.1+cu117


In [23]:
# Initialize model, loss function, and optimizer
model = SimpleRNN(len(vocab), embed_dim=10, hidden_dim=16)
loss_fn = nn.BCELoss()  # Binary cross entropy loss for binary classification
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Training loop over 5 epochs
model.train()
for epoch in range(5):
    for x_batch, y_batch in loader:
        optimizer.zero_grad()              # Reset gradients
        preds = model(x_batch)             # Forward pass
        loss = loss_fn(preds, y_batch)     # Compute loss
        loss.backward()                    # Backpropagation
        optimizer.step()                    # Update weights
    print(f"Epoch {epoch+1} Loss: {loss.item():.4f}")   # Print loss for each epoch


Epoch 1 Loss: 0.7407
Epoch 2 Loss: 0.5476
Epoch 3 Loss: 0.5783
Epoch 4 Loss: 0.4377
Epoch 5 Loss: 0.3722


The steady decline in loss suggests that the RNN is successfully learning to distinguish between positive and negative sentiment, even with a very small dataset. While the fluctuations (like in epoch 3) are normal due to small batch sizes, the overall trend indicates that the model is training effectively.

You can expand this notebook by adding full training loops, evaluation metrics, and visualizations of training/validation performance.