In [3]:
import torch
from tqdm import tqdm

In [7]:
words = open('data/names.txt', 'r').read().splitlines()
words[:5]

['emma', 'olivia', 'ava', 'isabella', 'sophia']

In [None]:
chars = ['.'] + sorted(list(set(''.join(words))))

In [13]:
stoi = {s:i for i,s in enumerate(chars)}
itos = {i:s for s,i in stoi.items()}

In [558]:
## Hyperparameters
block_size = 4
n_dim = 15
layer_size = 500

In [559]:
import random
X, Y = [], []

random.shuffle(words)
for w in words:
    context = [0] * block_size
    for ch in w + '.':
        X.append(context)
        Y.append(stoi[ch])
        context = context[1:] + [stoi[ch]]
X = torch.tensor(X)
Y = torch.tensor(Y)

train_split = (int) (X.shape[0] * .8)
val_split = (int) (.9 * X.shape[0])

train_X = X[:train_split]
train_Y = Y[:train_split]
val_X = X[train_split:val_split]
val_Y = Y[train_split:val_split]
test_X = X[val_split:]
test_Y = Y[val_split:]

In [560]:
C = torch.randn((27, n_dim), requires_grad=True)
w1 = torch.randn((n_dim * block_size, layer_size), requires_grad=True)
b1 = torch.randn((layer_size), requires_grad=True)
w2 = torch.randn((layer_size, 27), requires_grad=True)
b2 = torch.randn((27), requires_grad=True)
parameters = [C, w1, b1, w2, b2]

In [615]:
import torch.nn.functional as F
num_epochs = 20000
lr = 0.0001
for i in tqdm(range(num_epochs)):
    ix = torch.randint(0, train_X.shape[0], (64, ))
    
    enc = C[train_X[ix]]
    h = torch.tanh(torch.matmul(enc.view(-1, block_size * n_dim), w1) + b1)
    logits = torch.matmul(h, w2) + b2

    loss = F.cross_entropy(logits, train_Y[ix])

    for p in parameters:
        p.grad = None

    loss.backward()
    
    #lr = 0.1 if i < int(.5 * num_epochs) else 0.01
    for p in parameters:
        p.data -= lr * p.grad

    # if (i % 10 == 0):
    #     print(f"Epoch: {i} | Loss: {loss: .4f}")

100%|███████████████████████████████████████████████████████████████████| 20000/20000 [00:03<00:00, 5205.67it/s]


In [616]:
# Train Loss
enc = C[train_X]
h = torch.tanh(torch.matmul(enc.view(-1, block_size * n_dim), w1) + b1)
logits = torch.matmul(h, w2) + b2
loss = F.cross_entropy(logits, train_Y)
loss.item()

1.9902480840682983

In [617]:
# Val Loss
enc = C[val_X]
h = torch.tanh(torch.matmul(enc.view(-1, block_size * n_dim), w1) + b1)
logits = torch.matmul(h, w2) + b2
loss = F.cross_entropy(logits, val_Y)
loss.item()

2.130387783050537

In [618]:
# Test Loss
enc = C[test_X]
h = torch.tanh(torch.matmul(enc.view(-1, block_size * n_dim), w1) + b1)
logits = torch.matmul(h, w2) + b2
loss = F.cross_entropy(logits, test_Y)
loss.item()

2.134659767150879

In [622]:
num_samples = 10
for _ in range(num_samples):
    context = [0] * block_size
    word = ''
    while True:
        enc = C[torch.tensor(context)]
        h = torch.tanh(torch.matmul(enc.view(1, -1), w1) + b1)
        logits = torch.matmul(h, w2) + b2
        probs = logits.softmax(1)
        ix = torch.multinomial(probs, 1).item()
        context = context[1:] + [ix]
        if (ix == 0): break
        word += itos[ix]
    print(word)

travin
herra
pair
kafs
andin
mical
kaylan
deira
kya
aiab
