In [1]:
# ==========================================
# Toy RNN Examples for NLP Tasks in PyTorch
# ==========================================

# This notebook includes 3 simple examples:
# 1. Language Modeling (Next word prediction)
# 2. POS Tagging (Sequence labeling)
# 3. Text Classification (Sentence sentiment)

# Each example uses a custom RNN-based model
# with clear comments, clean OOP structure, and basic inference.

In [2]:
# =============================
# Common Setup & Imports
# =============================

import torch
import torch.nn as nn
import torch.nn.functional as F

In [3]:
# Sample vocab for all examples
vocab = ["so", "long", "and", "thanks", "janet", "will", "back", "the", "bill", "great", "bad"]
word_to_ix = {word: i for i, word in enumerate(vocab)}
ix_to_word = {i: word for word, i in word_to_ix.items()}
vocab_size = len(vocab)

In [4]:
# ==================================
# 1️. LANGUAGE MODELING (SO ➝ LONG)
# ==================================

class LanguageModelRNN(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.rnn = nn.RNN(embedding_dim, hidden_dim, batch_first=True)
        self.output = nn.Linear(hidden_dim, vocab_size)

    def forward(self, x):
        embeds = self.embedding(x)
        h0 = torch.zeros(1, x.size(0), self.rnn.hidden_size)
        rnn_out, _ = self.rnn(embeds, h0)
        logits = self.output(rnn_out.squeeze(0))
        return logits

sequence = ["so", "long", "and"]
targets = ["long", "and", "thanks"]
input_idxs = torch.tensor([[word_to_ix[w] for w in sequence]])
target_idxs = torch.tensor([word_to_ix[w] for w in targets])

lm_model = LanguageModelRNN(vocab_size, embedding_dim=8, hidden_dim=10)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(lm_model.parameters(), lr=0.1)

for epoch in range(100):
    optimizer.zero_grad()
    logits = lm_model(input_idxs)
    loss = loss_fn(logits, target_idxs)
    loss.backward()
    optimizer.step()
    if epoch % 20 == 0:
        print(f"[LM] Epoch {epoch} | Loss: {loss.item():.4f}")

# Inference for language modeling
with torch.no_grad():
    test_seq = torch.tensor([[word_to_ix["so"]]])
    logits = lm_model(test_seq)
    predicted_idx = torch.argmax(F.softmax(logits, dim=1)[-1]).item()
    print(f"[LM] Prediction after 'so': {ix_to_word[predicted_idx]}")

[LM] Epoch 0 | Loss: 2.3399
[LM] Epoch 20 | Loss: 0.4946
[LM] Epoch 40 | Loss: 0.1509
[LM] Epoch 60 | Loss: 0.0809
[LM] Epoch 80 | Loss: 0.0540
[LM] Prediction after 'so': long


In [5]:
# ======================================
# 2️. POS TAGGING (JANET WILL BACK THE...)
# ======================================

tags = ["NNP", "MD", "VB", "DT", "NN"]
tag_to_ix = {tag: i for i, tag in enumerate(set(tags))}
ix_to_tag = {i: tag for tag, i in tag_to_ix.items()}
tagset_size = len(tag_to_ix)

class POSRNN(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, tagset_size):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.rnn = nn.RNN(embedding_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, tagset_size)

    def forward(self, x):
        embeds = self.embedding(x)
        h0 = torch.zeros(1, x.size(0), self.rnn.hidden_size)
        rnn_out, _ = self.rnn(embeds, h0)
        logits = self.fc(rnn_out.squeeze(0))
        return logits

sentence = ["janet", "will", "back", "the", "bill"]
input_idxs = torch.tensor([[word_to_ix[w] for w in sentence]])
target_idxs = torch.tensor([tag_to_ix[t] for t in tags])

pos_model = POSRNN(vocab_size, embedding_dim=8, hidden_dim=10, tagset_size=tagset_size)
pos_loss_fn = nn.CrossEntropyLoss()
pos_optimizer = torch.optim.Adam(pos_model.parameters(), lr=0.05)

for epoch in range(100):
    pos_optimizer.zero_grad()
    logits = pos_model(input_idxs)
    loss = pos_loss_fn(logits, target_idxs)
    loss.backward()
    pos_optimizer.step()
    if epoch % 20 == 0:
        print(f"[POS] Epoch {epoch} | Loss: {loss.item():.4f}")

# POS Inference
with torch.no_grad():
    pred_logits = pos_model(input_idxs)
    predicted_tags = torch.argmax(F.softmax(pred_logits, dim=1), dim=1)
    print("[POS] Predictions:")
    for word, tag_idx in zip(sentence, predicted_tags):
        print(f"  {word:>6} → {ix_to_tag[tag_idx.item()]}")

[POS] Epoch 0 | Loss: 1.5741
[POS] Epoch 20 | Loss: 0.0036
[POS] Epoch 40 | Loss: 0.0009
[POS] Epoch 60 | Loss: 0.0006
[POS] Epoch 80 | Loss: 0.0006
[POS] Predictions:
   janet → NNP
    will → MD
    back → VB
     the → DT
    bill → NN


In [6]:
# =========================================
# 3️. TEXT CLASSIFICATION (Positive/Negative)
# =========================================

class_labels = ["pos", "neg"]
label_to_ix = {label: i for i, label in enumerate(class_labels)}
ix_to_label = {i: label for label, i in label_to_ix.items()}

class ClassifierRNN(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, num_classes):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.rnn = nn.RNN(embedding_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, num_classes)

    def forward(self, x):
        embeds = self.embedding(x)
        h0 = torch.zeros(1, x.size(0), self.rnn.hidden_size)
        rnn_out, hn = self.rnn(embeds, h0)
        final_hidden = hn[-1]  # (batch, hidden_dim)
        logits = self.fc(final_hidden)
        return logits

# Toy examples with some positive/negative words
train_sentences = [["so", "great"], ["bad", "bill"]]
train_labels = ["pos", "neg"]
train_inputs = torch.tensor([[word_to_ix[w] for w in s] for s in train_sentences])
train_targets = torch.tensor([label_to_ix[l] for l in train_labels])

clf_model = ClassifierRNN(vocab_size, embedding_dim=8, hidden_dim=12, num_classes=2)
clf_loss_fn = nn.CrossEntropyLoss()
clf_optimizer = torch.optim.Adam(clf_model.parameters(), lr=0.01)

for epoch in range(100):
    clf_optimizer.zero_grad()
    logits = clf_model(train_inputs)
    loss = clf_loss_fn(logits, train_targets)
    loss.backward()
    clf_optimizer.step()
    if epoch % 20 == 0:
        print(f"[CLF] Epoch {epoch} | Loss: {loss.item():.4f}")

# Classification Inference
with torch.no_grad():
    test_sent = torch.tensor([[word_to_ix["so"], word_to_ix["bad"]]])
    prediction = clf_model(test_sent)
    predicted_label = torch.argmax(F.softmax(prediction, dim=1), dim=1).item()
    print(f"[CLF] Prediction for ['so', 'bad']: {ix_to_label[predicted_label]}")

[CLF] Epoch 0 | Loss: 0.8039
[CLF] Epoch 20 | Loss: 0.0107
[CLF] Epoch 40 | Loss: 0.0024
[CLF] Epoch 60 | Loss: 0.0015
[CLF] Epoch 80 | Loss: 0.0012
[CLF] Prediction for ['so', 'bad']: pos
