In [10]:
import torch

# Create training data
words = open('./makemore-rs/names.txt').read().splitlines()
chars = sorted(list(set("".join(words))))
stoi = {s: i + 1 for i, s in enumerate(chars)}
stoi["."] = 0
itos = {i: s for s, i in stoi.items()}
# create the dataset
xs, ys = [], []
for w in words:
    chs = ["."] + list(w) + ["."]
    for ch1, ch2 in zip(chs, chs[1:]):
        ix1 = stoi[ch1]
        ix2 = stoi[ch2]
        xs.append(ix1)
        ys.append(ix2)
xs = torch.tensor(xs)
ys = torch.tensor(ys)
num = xs.nelement()
print("number of examples: ", num)

# initialize the 'network'
g = torch.Generator().manual_seed(2147483647)
W = torch.randn((27, 27), generator=g, requires_grad=True)

number of examples:  228146


In [16]:
import torch.nn.functional as F

# gradient descent
for k in range(100):

    # forward pass
    xenc = F.one_hot(
        xs, num_classes=27
    ).float()  # input to the network: one-hot encoding
    logits = xenc @ W  # predict log-counts
    counts = logits.exp()  # counts, equivalent to N
    probs = counts / counts.sum(1, keepdims=True)  # probabilities for next character
    loss = -probs[torch.arange(num), ys].log().mean() + 0.01 * (W**2).mean()
    print(loss.item())

    # backward pass
    W.grad = None  # set to zero the gradient
    loss.backward()

    # update
    W.data += -50 * W.grad

2.48717999458313
2.487077474594116
2.4869771003723145
2.4868791103363037
2.486783027648926
2.4866888523101807
2.4865968227386475
2.486506938934326
2.4864184856414795
2.486332654953003
2.4862477779388428
2.4861648082733154
2.486083745956421
2.4860036373138428
2.4859261512756348
2.485849618911743
2.485774517059326
2.485701560974121
2.4856293201446533
2.48555850982666
2.4854891300201416
2.4854214191436768
2.4853551387786865
2.4852893352508545
2.4852259159088135
2.4851627349853516
2.485100746154785
2.4850404262542725
2.484980821609497
2.4849226474761963
2.484865427017212
2.484809160232544
2.4847536087036133
2.484699249267578
2.4846460819244385
2.4845938682556152
2.4845426082611084
2.4844918251037598
2.4844422340393066
2.48439359664917
2.4843459129333496
2.4842989444732666
2.4842522144317627
2.4842066764831543
2.4841620922088623
2.4841182231903076
2.484074592590332
2.484032392501831
2.48399019241333
2.4839489459991455
2.4839086532592773
2.4838690757751465
2.483829975128174
2.483790874481201

In [17]:
# finally, sample from the 'neural net' model
g = torch.Generator().manual_seed(2147483647)

for i in range(5):

    out = []
    ix = 0
    while True:

        # ----------
        # BEFORE:
        # p = P[ix]
        # ----------
        # NOW:
        xenc = F.one_hot(torch.tensor([ix]), num_classes=27).float()
        logits = xenc @ W  # predict log-counts
        counts = logits.exp()  # counts, equivalent to N
        p = counts / counts.sum(1, keepdims=True)  # probabilities for next character
        # ----------

        ix = torch.multinomial(p, num_samples=1, replacement=True, generator=g).item()
        out.append(itos[ix])
        if ix == 0:
            break
    print("".join(out))
    

cexze.
momasurailezityha.
konimittain.
llayn.
ka.
