In [1]:
# implementing just the main network for posterity

import torch
import torch.nn.functional as F

words = open('names.txt', 'r').read().splitlines()

chars = sorted(list(set("".join(words))))
stoi = {s:i+1 for i, s in enumerate(chars)}
stoi["."] = 0
itos = {i:s for s,i in stoi.items()}

In [2]:
xs = []
ys = []

for w in words:
  chs = ['.'] + list(w) + ['.']
  for ch1, ch2 in zip(chs, chs[1:]):
    ix1 = stoi[ch1]
    ix2 = stoi[ch2]
    xs.append(ix1)
    ys.append(ix2)

xs = torch.tensor(xs)
ys = torch.tensor(ys)

In [3]:
g = torch.Generator().manual_seed(2147483647)
W = torch.randn((27, 27), generator=g, requires_grad=True)

In [28]:
# gradient descent

n_iter = 10
l_rate = 50

for i in range(n_iter):
    #forward
    x_oh = F.one_hot(xs, num_classes=27).float() # converted to float so that you can feed it into the neural net
    logits = x_oh @ W # log-counts for each next letter
    counts = logits.exp() # proper counts for each next letter
    probs = counts/counts.sum(1, keepdim=True) # normalized probabilities for each next letter
    loss = -probs[torch.arange(probs.size(0)), ys].log().mean() # loss function (vectorized)

    #backward
    W.grad = None
    loss.backward()

    #update
    W.data -= l_rate * W.grad
print(loss.item())

# final loss: ~2.46

2.475414276123047


In [30]:
logits.size()

torch.Size([228146, 27])

In [None]:
n_names = 5

for i in range(n_names):
    w = []
    ix = 0
    while True:
        x_oh = F.one_hot(torch.tensor([ix]), num_classes=27).float()
        logits = x_oh @ W
        counts = logits.exp()
        probs = counts/counts.sum(1, keepdim=True)
        
        ix = torch.multinomial(probs, num_samples=1, replacement=True, generator=g).item()
        w.append(itos[ix])
        if ix == 0:
            break
    print(''.join(w))

mbenda.
daleleansah.
alarynermiabr.
sho.
traelela.
