In [1]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from datasets import load_dataset
from collections import Counter
import numpy as np

In [2]:
# 1. Load dataset and build vocab
from datasets import load_dataset
dataset = load_dataset("imdb")

train_texts = dataset['train']['text']
train_labels = dataset['train']['label']

test_texts = dataset['test']['text']
test_labels = dataset['test']['label']

def build_vocab(texts, max_size=100000):
    words = []
    for text in texts:
        words.extend(text.lower().split())
    freq = Counter(words)
    vocab = {word: i+1 for i, (word, _) in enumerate(freq.most_common(max_size))}
    return vocab

vocab = build_vocab(train_texts)

In [3]:
# 2. Encode texts to fixed-length sequences
def encode(text, vocab, max_len=100):
    tokens = text.lower().split()
    idxs = [vocab.get(token, 0) for token in tokens]  # 0 for unknown words
    if len(idxs) < max_len:
        idxs += [0] * (max_len - len(idxs))
    else:
        idxs = idxs[:max_len]
    return idxs

In [4]:
# 3. Custom dataset class
class TextDataset(Dataset):
    def __init__(self, texts, labels, vocab):
        self.texts = texts
        self.labels = labels
        self.vocab = vocab
    
    def __len__(self):
        return len(self.texts)
    
    def __getitem__(self, idx):
        x = torch.tensor(encode(self.texts[idx], self.vocab), dtype=torch.long)
        y = torch.tensor(self.labels[idx], dtype=torch.long)
        return x, y

train_dataset = TextDataset(train_texts, train_labels, vocab)
test_dataset = TextDataset(test_texts, test_labels, vocab)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64)

In [5]:
# 4. Define model
import torch
import torch.nn as nn

class RNNClassifier(nn.Module):
    def __init__(
        self,
        vocab_size,
        embed_dim=50,
        hidden_dim=64,
        output_dim=2,
        num_layers=2,
        dropout=0.3,
        bidirectional=True,
        padding_idx=0
    ):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size + 1, embed_dim, padding_idx=padding_idx)
        self.rnn = nn.RNN(
            input_size=embed_dim,
            hidden_size=hidden_dim,
            num_layers=num_layers,
            dropout=dropout if num_layers > 1 else 0.0,
            bidirectional=bidirectional,
            batch_first=True
        )
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(hidden_dim * (2 if bidirectional else 1), output_dim)

        self._init_weights()

    def _init_weights(self):
        for name, param in self.rnn.named_parameters():
            if 'weight' in name:
                nn.init.xavier_uniform_(param)
            elif 'bias' in name:
                nn.init.zeros_(param)
        nn.init.xavier_uniform_(self.fc.weight)
        nn.init.zeros_(self.fc.bias)

    def forward(self, x):
        x = self.embedding(x)
        out, _ = self.rnn(x)  # (batch, seq_len, hidden_dim * num_directions)
        out = out.mean(dim=1)  # (batch, hidden_dim * num_directions)
        out = self.dropout(out)
        return self.fc(out)

model = RNNClassifier(
    vocab_size=len(vocab),
    embed_dim=128,
    hidden_dim=64,
    output_dim=2,
    num_layers=10,
    dropout=0.3,
    bidirectional=True,
    padding_idx=0
)

In [6]:
# Sanity Check

sample_x, sample_y = next(iter(train_loader))
sample_out = model(sample_x.to("cpu"))
print("Output shape:", sample_out.shape)
print("Label shape:", sample_y.shape)
print("Sample labels:", sample_y[:10])

# Double Checking for my own Sanity

for x_batch, y_batch in train_loader:
    y_batch = y_batch.to("cpu")
    assert y_batch.min() >= 0 and y_batch.max() < 2, f"Label values out of range: min={y_batch.min()}, max={y_batch.max()}"
    break


Output shape: torch.Size([64, 2])
Label shape: torch.Size([64])
Sample labels: tensor([1, 0, 1, 1, 1, 1, 1, 0, 0, 1])


In [7]:
# 5. Loss and optimizer and scheduler
from torch.optim.lr_scheduler import ReduceLROnPlateau

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3)

In [8]:
# 6. Training loop
def train_epoch(model, loader, optimizer, criterion, device, max_grad_norm=5.0):
    model.train()
    total_loss = 0
    for x_batch, y_batch in loader:
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        
        optimizer.zero_grad()
        outputs = model(x_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()

        # ✅ Gradient clipping
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)

        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

In [10]:
# 7. Evaluation function
def evaluate(model, loader, criterion, device):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for x_batch, y_batch in loader:
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            outputs = model(x_batch)
            loss = criterion(outputs, y_batch)
            total_loss += loss.item()
            
            preds = outputs.argmax(dim=1)
            correct += (preds == y_batch).sum().item()
            total += y_batch.size(0)
    return total_loss / len(loader), correct / total

In [11]:
# Training 

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3)
early_stopper = EarlyStopper(patience=5)

epochs = 50
for epoch in range(epochs):
    train_loss = train_epoch(model, train_loader, optimizer, criterion, device)
    val_loss, val_acc = evaluate(model, test_loader, criterion, device)
    scheduler.step(val_loss)

    print(f"Epoch {epoch+1}: Train loss={train_loss:.4f}, Val loss={val_loss:.4f}, Val acc={val_acc:.4f}")

Epoch 1: Train loss=0.6124, Val loss=0.5140, Val acc=0.7519
Epoch 2: Train loss=0.3995, Val loss=0.5153, Val acc=0.7626
Epoch 3: Train loss=0.2882, Val loss=0.6004, Val acc=0.7654
Epoch 4: Train loss=0.2312, Val loss=0.6514, Val acc=0.7626
Epoch 5: Train loss=0.1916, Val loss=0.6159, Val acc=0.7587
Epoch 6: Train loss=0.1411, Val loss=0.6834, Val acc=0.7577
Epoch 7: Train loss=0.1078, Val loss=0.7233, Val acc=0.7704
Epoch 8: Train loss=0.0761, Val loss=0.8785, Val acc=0.7636
Epoch 9: Train loss=0.0557, Val loss=1.0701, Val acc=0.7680
Epoch 10: Train loss=0.0388, Val loss=1.1157, Val acc=0.7702
Epoch 11: Train loss=0.0276, Val loss=1.2663, Val acc=0.7694
Epoch 12: Train loss=0.0206, Val loss=1.3909, Val acc=0.7688
Epoch 13: Train loss=0.0155, Val loss=1.6937, Val acc=0.7634
Epoch 14: Train loss=0.0109, Val loss=1.6909, Val acc=0.7646
Epoch 15: Train loss=0.0087, Val loss=1.7479, Val acc=0.7638
Epoch 16: Train loss=0.0064, Val loss=1.8795, Val acc=0.7638
Epoch 17: Train loss=0.0055, Val 

In [12]:
test_acc = evaluate(model, test_loader, criterion, device)
print(f"Test Accuracy: {round(test_acc[1] * 100, 2)}%")

Test Accuracy: 76.42%
