In [8]:
import numpy as np

# Basic RNN Forward Pass Implementation
def rnn_scratch_demo():
    # Hyperparameters
    input_size = 10   # Size of one-hot vector
    hidden_size = 16  # Size of hidden state
    
    # Initialize weights with small random values
    Wxh = np.random.randn(hidden_size, input_size) * 0.01
    Whh = np.random.randn(hidden_size, hidden_size) * 0.01
    bh = np.zeros((hidden_size, 1))
    
    # Initial states
    h_prev = np.zeros((hidden_size, 1))
    x_t = np.random.randn(input_size, 1) # Simulated input
    
    # Hidden state calculation: h_t = tanh(Wxh * x_t + Whh * h_{t-1} + b)
    h_t = np.tanh(np.dot(Wxh, x_t) + np.dot(Whh, h_prev) + bh)
    
    print("--- Part 1: RNN From Scratch ---")
    print(f"Computed Hidden State Shape: {h_t.shape}")
    print("NumPy Implementation logic complete.\n")

rnn_scratch_demo()

--- Part 1: RNN From Scratch ---
Computed Hidden State Shape: (16, 1)
NumPy Implementation logic complete.



In [9]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim

# 1. Load Data

df = pd.read_csv('/Users/jahanavisingh/Downloads/poems-100.csv')

# Extract 'text' column and drop any empty rows
poems_list = df['text'].dropna().astype(str).tolist()

# 2. Tokenization & Vocab Building
# Combine all poems, lowercase them, and split into words
all_words = " ".join(poems_list).lower().split()
vocab = sorted(list(set(all_words)))
word_to_idx = {word: i for i, word in enumerate(vocab)}
idx_to_word = {i: word for i, word in enumerate(vocab)}
vocab_size = len(vocab)

# 3. Create Sequences (X = word, Y = next word)
input_seq = []
target_seq = []

for poem in poems_list:
    tokens = poem.lower().split()
    for i in range(len(tokens) - 1):
        input_seq.append(word_to_idx[tokens[i]])
        target_seq.append(word_to_idx[tokens[i+1]])

inputs = torch.LongTensor(input_seq)
targets = torch.LongTensor(target_seq)

print("--- Data Preprocessing ---")
print(f"Total Unique Words (Vocab): {vocab_size}")
print(f"Total Training Pairs: {len(inputs)}")

--- Data Preprocessing ---
Total Unique Words (Vocab): 6989
Total Training Pairs: 24634


In [10]:
class OneHotRNN(nn.Module):
    def __init__(self, vocab_size, hidden_size):
        super(OneHotRNN, self).__init__()
        self.vocab_size = vocab_size
        self.rnn = nn.RNN(vocab_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, vocab_size)

    def forward(self, x):
        # Convert index to one-hot on the fly
        x_one_hot = nn.functional.one_hot(x, num_classes=self.vocab_size).float()
        x_one_hot = x_one_hot.unsqueeze(1) # Add sequence dimension (batch, seq_len, vocab_size)
        out, _ = self.rnn(x_one_hot)
        out = self.fc(out.squeeze(1))
        return out

# Training One-Hot Model
model_one_hot = OneHotRNN(vocab_size, 64)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_one_hot.parameters(), lr=0.005)

print("\n--- Training Part 2: One-Hot RNN ---")
for epoch in range(100):
    optimizer.zero_grad()
    outputs = model_one_hot(inputs)
    loss = criterion(outputs, targets)
    loss.backward()
    optimizer.step()
    if (epoch + 1) % 20 == 0:
        print(f"Epoch [{epoch+1}/100], Loss: {loss.item():.4f}")


--- Training Part 2: One-Hot RNN ---
Epoch [20/100], Loss: 7.1405
Epoch [40/100], Loss: 6.2363
Epoch [60/100], Loss: 5.4967
Epoch [80/100], Loss: 4.4459
Epoch [100/100], Loss: 3.5648


In [11]:
class EmbeddingRNN(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_size):
        super(EmbeddingRNN, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.rnn = nn.RNN(embed_dim, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, vocab_size)

    def forward(self, x):
        x_embed = self.embedding(x).unsqueeze(1) # (batch, seq_len, embed_dim)
        out, _ = self.rnn(x_embed)
        out = self.fc(out.squeeze(1))
        return out

# Training Embedding Model
embed_dim = 50
model_embed = EmbeddingRNN(vocab_size, embed_dim, 64)
optimizer_embed = optim.Adam(model_embed.parameters(), lr=0.005)

print("\n--- Training Part 3: Embedding RNN ---")
for epoch in range(100):
    optimizer_embed.zero_grad()
    outputs = model_embed(inputs)
    loss_e = criterion(outputs, targets)
    loss_e.backward()
    optimizer_embed.step()
    if (epoch + 1) % 20 == 0:
        print(f"Epoch [{epoch+1}/100], Loss: {loss_e.item():.4f}")


--- Training Part 3: Embedding RNN ---
Epoch [20/100], Loss: 6.9105
Epoch [40/100], Loss: 5.1824
Epoch [60/100], Loss: 4.2695
Epoch [80/100], Loss: 3.7832
Epoch [100/100], Loss: 3.4792


In [12]:
def generate_text(model, start_word, length=8):
    model.eval()
    word = start_word.lower()
    if word not in word_to_idx: return "Word not in vocab"
    
    result = [word]
    for _ in range(length):
        inp = torch.LongTensor([word_to_idx[word]])
        out = model(inp)
        _, next_idx = torch.max(out, dim=1)
        word = idx_to_word[next_idx.item()]
        result.append(word)
    return " ".join(result)

# Pick a seed word from your data
seed = all_words[0]

print("\n--- Final Analysis & Comparison ---")
print(f"Final One-Hot Loss: {loss.item():.4f}")
print(f"Final Embedding Loss: {loss_e.item():.4f}")
print("-" * 30)
print(f"Seed word: '{seed}'")
print("Generated (One-Hot):", generate_text(model_one_hot, seed))
print("Generated (Embedding):", generate_text(model_embed, seed))


--- Final Analysis & Comparison ---
Final One-Hot Loss: 3.5648
Final Embedding Loss: 3.4792
------------------------------
Seed word: 'o'
Generated (One-Hot): o lord, to the young men and the young
Generated (Embedding): o lord, i am to the young men and
