In [1]:
# Libraries
import torch
import torch.nn as nn
import torch.nn.functional as F
import random

In [2]:
# Download Tiny Shakespeare or use your own plain text file
!wget https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt -O input.txt

--2025-05-22 15:34:20--  https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.110.133, 185.199.109.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1115394 (1.1M) [text/plain]
Saving to: ‘input.txt’


2025-05-22 15:34:20 (12.9 MB/s) - ‘input.txt’ saved [1115394/1115394]



In [3]:
# Vocab
with open("input.txt", "r") as f:
    text = f.read()

vocab = sorted(set(text))
vocab_size = len(vocab)
print(f"Unique chars: {vocab_size}")

Unique chars: 65


In [4]:
char_to_idx = {ch: i for i, ch in enumerate(vocab)}
idx_to_char = {i: ch for ch, i in char_to_idx.items()}
encoded_text = torch.tensor([char_to_idx[c] for c in text], dtype=torch.long)

In [5]:
def get_batch(seq_len=64, batch_size=1):
    starts = [random.randint(0, len(encoded_text) - seq_len - 1) for _ in range(batch_size)]
    x = torch.stack([encoded_text[s:s+seq_len] for s in starts])
    y = torch.stack([encoded_text[s+1:s+seq_len+1] for s in starts])
    return x, y

In [6]:
# Manual RNN Cell

class MyRNNCell(nn.Module):
    def __init__(self, input_size, hidden_size):
        super().__init__()
        self.W_ih = nn.Parameter(torch.randn(hidden_size, input_size) * 0.01)
        self.W_hh = nn.Parameter(torch.randn(hidden_size, hidden_size) * 0.01)
        self.b_ih = nn.Parameter(torch.zeros(hidden_size))
        self.b_hh = nn.Parameter(torch.zeros(hidden_size))

    def forward(self, x, h_prev):
        return torch.tanh(x @ self.W_ih.T + self.b_ih + h_prev @ self.W_hh.T + self.b_hh)

In [7]:
# RNN From Scratch

class CharRNN_Custom(nn.Module):
    def __init__(self, vocab_size, hidden_size):
        super().__init__()
        self.hidden_size = hidden_size
        self.embed = nn.Embedding(vocab_size, vocab_size)  # one-hot style
        self.rnn_cell = MyRNNCell(vocab_size, hidden_size)
        self.fc = nn.Linear(hidden_size, vocab_size)

    def forward(self, x, h=None):
        batch_size, seq_len = x.shape
        if h is None:
            h = torch.zeros(batch_size, self.hidden_size)
        logits = []
        for t in range(seq_len):
            x_t = self.embed(x[:, t])
            h = self.rnn_cell(x_t, h)
            logits.append(self.fc(h))
        return torch.stack(logits, dim=1), h

In [8]:
# PyTorch nn.RNN Version

class CharRNN_PyTorch(nn.Module):
    def __init__(self, vocab_size, hidden_size, num_layers=1):
        super().__init__()
        self.embed = nn.Embedding(vocab_size, vocab_size)
        self.rnn = nn.RNN(vocab_size, hidden_size, num_layers=num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, vocab_size)

    def forward(self, x, h=None):
        x = self.embed(x)
        out, h = self.rnn(x, h)
        logits = self.fc(out)
        return logits, h

In [9]:
# Train Function (shared)

def train_model(model, steps=3000, print_every=500, lr=1e-3):
    model.train()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    loss_fn = nn.CrossEntropyLoss()
    for step in range(steps):
        x, y = get_batch()
        logits, _ = model(x)
        loss = loss_fn(logits.view(-1, vocab_size), y.view(-1))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if step % print_every == 0:
            print(f"Step {step}, Loss: {loss.item():.4f}")

In [10]:
# Sample Function

def sample(model, start_char="T", length=200):
    model.eval()
    idx = char_to_idx[start_char]
    input = torch.tensor([[idx]])
    h = None
    result = [start_char]
    for _ in range(length):
        logits, h = model(input, h)
        probs = F.softmax(logits[:, -1], dim=-1)
        idx = torch.multinomial(probs, num_samples=1).item()
        input = torch.tensor([[idx]])
        result.append(idx_to_char[idx])
    return ''.join(result)

In [11]:
# Train and Compare
print("Training RNN from scratch...")
custom_model = CharRNN_Custom(vocab_size, hidden_size=128)
train_model(custom_model)

Training RNN from scratch...
Step 0, Loss: 4.1850
Step 500, Loss: 2.6011
Step 1000, Loss: 2.4569
Step 1500, Loss: 2.5476
Step 2000, Loss: 1.9809
Step 2500, Loss: 1.8580


In [12]:
print("\nGenerated by custom RNN:")
print(sample(custom_model, "T"))


Generated by custom RNN:
Thour ione grair lon, I'll sow!

Fimcel to rulllatinge bowd be,
Home his prouriof upalt, tout, not will all prite ot would arout bot thall mne's in bene, thou might his ofe-
Bull wirs: turch-dost ona t


In [13]:
print("\nTraining nn.RNN model...")
pytorch_model = CharRNN_PyTorch(vocab_size, hidden_size=128)
train_model(pytorch_model)


Training nn.RNN model...
Step 0, Loss: 4.1594
Step 500, Loss: 2.0742
Step 1000, Loss: 1.8567
Step 1500, Loss: 1.9986
Step 2000, Loss: 1.8591
Step 2500, Loss: 1.9798


In [14]:
print("\nGenerated by nn.RNN:")
print(sample(pytorch_model, "T"))


Generated by nn.RNN:
TAM:
Chid, I love,
Coich for gesielt the al'ust Ge! ast have but atbere' nigh,
Scems oulds hers all taces heence.
WhPreale.
Will kays the bich beso pousiundoun thil uf cabsay exvathour deat, bus llope 
