In [287]:
words = open('names.txt', 'r').read().splitlines()
len(words)

32033

In [288]:
chars = sorted(list(set(''.join(words))))
str_to_int = {s: i+1 for i, s in enumerate(chars)}
str_to_int['.'] = 0
int_to_str = {i: s for s, i in str_to_int.items()}

In [289]:
# create training set of bigrams (x, y)
import torch
xs, ys = [], []

for w in words:
    chars = ['.'] + list(w) + ['.']
    for ch1, ch2 in zip(chars, chars[1:]):
        i1 = str_to_int[ch1]
        i2 = str_to_int[ch2]

        xs.append(i1)
        ys.append(i2)

xs = torch.tensor(xs)
ys = torch.tensor(ys)
batch_size = xs.nelement()
print(f'Num examples: {batch_size}')

Num examples: 228146


In [290]:
import torch.nn.functional as F

g = torch.Generator() # Consistent numbers

'''
W essentially becomes like N from normal bigram
Matrix multiplication essentially just picks out the correct row of W then it gets normalized
One hot with 0s and 1s essentially just preserves that row
'''
W = torch.randn((27, 27), requires_grad=True, generator=g) # Fills with random floats based on normal distribution

In [291]:
steps = 500
lr = 50

for i in range(steps):
    # Forward pass
    xenc = F.one_hot(xs, num_classes=27).float()
    logits = xenc @ W # Log counts, one pass of a neural net with 1 linear layer
    counts = logits.exp() # Equivalent to N from normal bigram
    probs = counts / counts.sum(1, keepdim=True) # Create probabilities
    # Previous 2 lines form a softmax
    loss = -probs[torch.arange(batch_size), ys].log().mean() + 0.1*(W**2).mean() # Regularization loss for smoother distribution

    # Backward pass
    W.grad = None # Zero grads
    loss.backward()

    # Gradient descent
    W.data += -lr * W.grad

    if i % 100 == 0:
        print(f'Step {i} with loss {loss.item()}')

print(f'Final loss: {loss.item()}')

Step 0 with loss 3.967561960220337
Step 100 with loss 2.587512254714966
Step 200 with loss 2.5863962173461914
Step 300 with loss 2.5863640308380127
Step 400 with loss 2.586362600326538
Final loss: 2.586362600326538


In [292]:
names = 10

for _ in range(names):
    new_name = ''
    ix = 0

    while True:
        xenc = F.one_hot(torch.tensor([ix]), num_classes=27).float()
        logits = xenc @ W

        counts = logits.exp() # Softmax
        probs = counts / counts.sum(1, keepdim=True) # Create probabilities

        ix = torch.multinomial(probs, num_samples=1, replacement=True, generator=g).item()
        new_name += int_to_str[ix]

        if ix == 0:
            break

    new_name = new_name[:-1]
    print(new_name)

la
m
kamenhbridhahmasgrezari
hdolknrs
beryalon
alyn
lio
rmelycin
aneijuelli
trkarinayneviefpienawwanahinahiesjoamdn
