In [2]:
import torch 
import torch.nn as nn
import math
import torch.optim as optim

In [3]:
DATA = [
    ("capital of usa", "washington"),
    ("capital of india", "delhi"),
    ("capital of france", "paris"),
]

# Tokenize and Vocabulary

In [4]:
def tokenize(text):
    return text.lower().split()

In [5]:
# -----------------------
# 2) Build Vocabulary
# -----------------------
all_tokens = []
for q, a in DATA:
    all_tokens += tokenize(q)
    all_tokens += tokenize(a)
print(all_tokens)


['capital', 'of', 'usa', 'washington', 'capital', 'of', 'india', 'delhi', 'capital', 'of', 'france', 'paris']


In [6]:
vocab = sorted(set(all_tokens))
print("Sort the tokens === ", vocab)

Sort the tokens ===  ['capital', 'delhi', 'france', 'india', 'of', 'paris', 'usa', 'washington']


In [7]:
stoi = {w:i for i,w in enumerate(vocab)}
print("String to Index == ", stoi)
itos = {i:w for w,i in stoi.items()}
print("Index to String ==", itos)

String to Index ==  {'capital': 0, 'delhi': 1, 'france': 2, 'india': 3, 'of': 4, 'paris': 5, 'usa': 6, 'washington': 7}
Index to String == {0: 'capital', 1: 'delhi', 2: 'france', 3: 'india', 4: 'of', 5: 'paris', 6: 'usa', 7: 'washington'}


In [8]:
def encode(tokens):
    return [stoi[t] for t in tokens]

# Build RNN from Scratch (NO batch)

In [9]:
class SimpleRNN(nn.Module):
    def __init__(self, vocab_size, embedding_dim=4, hidden_size=4):
        super().__init__()
        self.hidden_size = hidden_size

        # Embedding matrix (V, E)
        self.E = nn.Parameter(torch.empty(vocab_size, embedding_dim))

        # RNN weights
        self.Wxh = nn.Parameter(torch.empty(embedding_dim, hidden_size))
        self.Whh = nn.Parameter(torch.empty(hidden_size, hidden_size))
        self.bh = nn.Parameter(torch.zeros(hidden_size)) # bias

        # Output Layer
        self.Why = nn.Parameter(torch.empty(hidden_size, vocab_size))
        self.by = nn.Parameter(torch.zeros(vocab_size)) # bias

        self.reset_parameters()

    def reset_parameters(self):
        bound = 1.0 / math.sqrt(self.hidden_size)
        nn.init.uniform_(self.E, -bound, bound)
        nn.init.uniform_(self.Wxh, -bound, bound)
        nn.init.uniform_(self.Whh, -bound, bound)
        nn.init.uniform_(self.Why, -bound, bound)
   
    def forward(self, token_ids):
        """
        token_ids: (T,)  one sentence
        """
        h = torch.zeros(self.hidden_size) # h0

        for t in range(len(token_ids)):
            x_t = self.E[token_ids[t]]

            preact = x_t @ self.Wxh + h @ self.Whh + self.bh
            h = torch.tanh(preact)

        logist = h @ self.Why + self.by
        return logist

# Training (No batch, one by one)

In [10]:
model = SimpleRNN(len(vocab))
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr=0.01)
epochs = 100

for epoch in range(epochs):
    total_loss = 0
    correct = 0

    for q, a in DATA:
        q_ids = torch.tensor(encode(tokenize(q)), dtype=torch.long)
        y_id  = torch.tensor([stoi[tokenize(a)[0]]], dtype=torch.long)
    
        logits = model(q_ids)
        loss = loss_fn(logits.unsqueeze(0), y_id)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        
        pred = logits.argmax().item()
        if pred == y_id.item():
            correct += 1
    
    if (epoch+1) % 50 == 0:
        print(f"Epoch {epoch+1} | Loss: {total_loss:.4f} | Acc: {correct/len(DATA):.2f}")



Epoch 50 | Loss: 0.3788 | Acc: 1.00
Epoch 100 | Loss: 0.0928 | Acc: 1.00


In [11]:
# -----------------------
# 5) Test
# -----------------------
test_q = "capital of france"
test_ids = torch.tensor(encode(tokenize(test_q)), dtype=torch.long)

with torch.no_grad():
    logits = model(test_ids)
    pred_id = logits.argmax().item()
    print("Predicted:", itos[pred_id])

Predicted: paris
