<a href="https://colab.research.google.com/github/Peeyusj/makeMorePart2/blob/main/week6_mlp_minibatch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
import torch
import torch.nn.functional as F
import pandas as pd

# Load data
url = "https://raw.githubusercontent.com/Peeyusj/makeMoreIndia/main/indian_names.csv"
df = pd.read_csv(url)
words = df['Name'].str.lower().tolist()
words = [w for w in words if isinstance(w, str) and w.isalpha()]

print(f"Total words: {len(words)}")

Total words: 6466


In [9]:
chars = sorted(set(''.join(words)))
chars = ['.'] + chars
stoi = {ch: i for i, ch in enumerate(chars)}
itos = {i: ch for i, ch in enumerate(chars)}
print(chars)
print(len(chars))

['.', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
27


In [10]:
C=torch.randn(27,2)
W1=torch.randn(6,100)
b1=torch.randn(100)

In [11]:
block_size=3
X,Y=[], []
for w in words:
 context=[0]*block_size
 for ch in w+'.':
  ix=stoi[ch]
  X.append(context)           # append FIRST
  Y.append(ix)
  context = context[1:] + [ix] # slide LAST

X = torch.tensor(X)
Y = torch.tensor(Y)
print(X.shape)
print(Y.shape)
print(X[:5])
print(Y[:5])

torch.Size([47561, 3])
torch.Size([47561])
tensor([[0, 0, 0],
        [0, 0, 1],
        [0, 1, 1],
        [1, 1, 2],
        [1, 2, 9]])
tensor([1, 1, 2, 9, 4])


In [12]:
# Split data
n1 = int(0.8 * len(X))   # 80% mark
n2 = int(0.9 * len(X))   # 90% mark

Xtr, Ytr = X[:n1], Y[:n1]           # training
Xval, Yval = X[n1:n2], Y[n1:n2]     # validation
Xtest, Ytest = X[n2:], Y[n2:]       # test

print(f"Train: {Xtr.shape[0]}, Val: {Xval.shape[0]}, Test: {Xtest.shape[0]}")

Train: 38048, Val: 4756, Test: 4757


In [31]:
# Retrain with embedding=5 (our best model)
C  = torch.randn(27, 5)
W1 = torch.randn(15, 100)
b1 = torch.randn(100)
W2 = torch.randn(100, 27)
b2 = torch.randn(27)

parameters = [C, W1, b1, W2, b2]
for p in parameters:
    p.requires_grad = True

for i in range(20000):
    ix = torch.randint(0, Xtr.shape[0], (32,))

    emb = C[Xtr[ix]]
    h = torch.tanh(emb.view(-1, 15) @ W1 + b1)
    logits = h @ W2 + b2
    loss = F.cross_entropy(logits, Ytr[ix])

    for p in parameters:
        p.grad = None
    loss.backward()

    lr = 0.1 if i < 10000 else 0.01

    for p in parameters:
        p.data += -lr * p.grad

    if i % 3000 == 0:
        print(f"Step {i}, Loss: {loss.item():.4f}")

Step 0, Loss: 19.5259
Step 3000, Loss: 2.1002
Step 6000, Loss: 2.7268
Step 9000, Loss: 1.8678
Step 12000, Loss: 2.3716
Step 15000, Loss: 1.9015
Step 18000, Loss: 2.1609


In [32]:
# Evaluate
emb = C[Xtr]
h = torch.tanh(emb.view(-1, 15) @ W1 + b1)  # 15!
logits = h @ W2 + b2
train_loss = F.cross_entropy(logits, Ytr)

emb = C[Xval]
h = torch.tanh(emb.view(-1, 15) @ W1 + b1)  # 15!
logits = h @ W2 + b2
val_loss = F.cross_entropy(logits, Yval)

print(f"Train loss: {train_loss.item():.4f}")
print(f"Val loss: {val_loss.item():.4f}")

Train loss: 2.1685
Val loss: 2.6352


In [34]:
for _ in range(15):
    name = []
    context = [0] * block_size

    while True:
        emb = C[torch.tensor([context])]
        h = torch.tanh(emb.view(-1, 15) @ W1 + b1)
        logits = h @ W2 + b2
        probs = F.softmax(logits, dim=1)
        ix = torch.multinomial(probs, num_samples=1).item()

        if ix == 0:
            break
        name.append(itos[ix])
        context = context[1:] + [ix]

    print(''.join(name))


mesh
disa
balharnos
amnati
prevesheesh
gosisuda
mehi
kmamadur
marekharana
dapala
ampa
nash
mana
itmulaltir
hreshmeetwrer


In [35]:
emb = C[Xtest]
h = torch.tanh(emb.view(-1, 15) @ W1 + b1)
logits = h @ W2 + b2
test_loss = F.cross_entropy(logits, Ytest)

print(f"Train loss: {train_loss.item():.4f}")
print(f"Val loss: {val_loss.item():.4f}")
print(f"Test loss: {test_loss.item():.4f}")

Train loss: 2.1685
Val loss: 2.6352
Test loss: 3.0379
