In [253]:
import torch
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [254]:
# extract all names from names.txt
names = open("names.txt", "r")
names = names.read().split('\n')
names[:10]

['emma',
 'olivia',
 'ava',
 'isabella',
 'sophia',
 'charlotte',
 'mia',
 'amelia',
 'harper',
 'evelyn']

In [255]:
# extract all trigrams as tuples
trigrams = []
for word in names:
    word = ['.'] + list(word) + ['.']
    # combines multiple iterables element-by-element into tuples
    # zip(iterable1, iterable2, iterable3)
    for ch1, ch2, ch3 in zip(word, word[1:], word[2:]):
        trigrams.append((ch1, ch2, ch3))

In [256]:
# create dataset where first 2 characters in the tuple are inputs (x), last character is output (y)
inputs = [ch1 + ch2 for (ch1, ch2, _ch3) in trigrams]
labels = [ch3 for (_ch1, _ch2, ch3) in trigrams]

In [257]:
# convert characters to integer mappings
N = 27
chars = (sorted(set(''.join(inputs))))
bigrams = []
for ch1 in chars:
    for ch2 in chars:
        bigrams.append(ch1 + ch2)

input_to_i = {bigram: i for i, bigram in enumerate(bigrams)}
output_to_i = {ch: i for i, ch in enumerate(chars)}
i_to_output = {i: ch for i, ch in enumerate(chars)}
print(i_to_output)

{0: '.', 1: 'a', 2: 'b', 3: 'c', 4: 'd', 5: 'e', 6: 'f', 7: 'g', 8: 'h', 9: 'i', 10: 'j', 11: 'k', 12: 'l', 13: 'm', 14: 'n', 15: 'o', 16: 'p', 17: 'q', 18: 'r', 19: 's', 20: 't', 21: 'u', 22: 'v', 23: 'w', 24: 'x', 25: 'y', 26: 'z'}


In [258]:
import torch.nn.functional as F
# one hot encode inputs
x = [input_to_i[input] for input in inputs]
x = torch.tensor(x)
x = F.one_hot(x, num_classes=N*N).float()

In [259]:
# neural net that maps one_hot encoded inputs to a probability distribution for the next character in the sequence
W = torch.randn((N*N, N), requires_grad=True)
y = torch.tensor([output_to_i[ch] for ch in labels], dtype=torch.long)

In [None]:
# train model
for epoch in range(1000):
    # forward pass
    logits = x @ W
    counts = logits.exp()
    probs = counts / counts.sum(dim=1, keepdim=True)
    # calculate loss
    loss = -probs[torch.arange(len(probs)), y].log().mean() + 0.1*(W**2).mean()
    # backpropagate
    loss.backward()
    #optimize, dont add to comp graph for this calculation
    with torch.no_grad():
        W -= 10*W.grad
        W.grad.zero_()

print(loss.item())

2.2989392280578613


In [448]:
for name in range(100):    
    out = []
    bigram = '..'
    ix = 1
    T = 0.8
    while True:
        # get probability distribution using forward pass through neural net
        xenc = F.one_hot(torch.tensor([input_to_i[bigram]]), num_classes=N*N).float()
        logits = (xenc @ W)
        if bigram != '..':
            logits = logits / T
        counts = logits.exp()
        p = counts / counts.sum(dim=1, keepdim=True)
        ix = torch.multinomial(p, num_samples=1, replacement=True).item()
        out.append(i_to_output[ix])
        bigram = bigram[1] + i_to_output[ix]
        if ix == 0:
            break

    print(''.join(out))

brah.
vana.
palan.
olhlvee.
bewjoric.
areyanne.
yosellarian.
xaurggia.
vin.
chen.
yanichay.
esfq.
belie.
jeon.
man.
vine.
ca.
pree.
quan.
xbbiah.
quaryah.
ya.
yanshipmrqfyn.
vi.
ter.
coraveren.
chea.
vanne.
sulin.
uyw.
vienihilee.
va.
carldyn.
na.
cswxmwwcznrona.
rah.
en.
isen.
xafftkuwashiya.
na.
taid.
qsffydena.
no.
vi.
yia.
valee.
hayan.
gavon.
vippcra.
esea.
mon.
vio.
jaqoe.
xai.
cani.
parstray.
vytdurie.
th.
maana.
ca.
xzaxlvan.
vane.
britlley.
phannellia.
zyre.
xr.
orrish.
sha.
wenn.
via.
iz.
chia.
valliraymiyarmjcfey.
yavibbie.
wavi.
xanie.
ilyn.
vexkjcfcrzelian.
wane.
dellexjlmjhus.
xaanarahmarmon.
yanny.
vistilana.
bettenna.
marayah.
vellandawn.
yah.
yanna.
uina.
vvalyanjudtw.
xwkyzuzarsquin.
caster.
esal.
lingez.
sebwuandzn.
yannel.
vgyttlynna.
bree.
sadea.
jane.
