### Lesson 1

In [1]:
import torch
import torch.nn.functional as F
words = open('names.txt').read().splitlines()

In [2]:
# construct lookup table
chars = sorted(list(set(''.join(words))))
stoi = {s:i+1 for i,s in enumerate(chars)}
stoi['.'] = 0
itos = {i:s for s,i in stoi.items()}

In [5]:
# create the dataset
xs, ys = [], []
for w in words:
    chs = ['.'] + list(w) + ['.']
    for ch1, ch2 in zip(chs, chs[1:]):
        ix1 = stoi[ch1]
        ix2 = stoi[ch2]
        xs.append(ix1)
        ys.append(ix2)

xs = torch.tensor(xs)
ys = torch.tensor(ys)
num = xs.nelement()
print('Number of examples: ', num)

# init 'network'
g = torch.Generator().manual_seed(21427483647)
W = torch.randn((27, 27), generator=g, requires_grad=True)

Number of examples:  228146


In [7]:
# gradient descent
for k in range(100):
    # forward pass
    xenc = F.one_hot(xs, num_classes=27).float() # input to network
    logits = xenc @ W # predict log-counts
    # Softmax from scratch
    counts = logits.exp() # counts, equivalent to N
    probs = counts / counts.sum(1, keepdim=True) # probs for next char
    loss = -probs[torch.arange(num), ys].log().mean() + 0.1*(W**2).mean()
    print(loss.item())

    # backward pass
    W.grad = None # same as setting gradients to 0
    loss.backward()

    # update
    lr = -50
    W.data += lr*W.grad

3.7688002586364746
3.3679373264312744
3.161724090576172
3.0385050773620605
2.9520912170410156
2.8889198303222656
2.841514825820923
2.80497145652771
2.7760958671569824
2.7528128623962402
2.7337334156036377
2.717888116836548
2.704572916030884
2.6932666301727295
2.683574914932251
2.6751952171325684
2.6678903102874756
2.661473274230957
2.6557962894439697
2.650740623474121
2.6462109088897705
2.6421310901641846
2.6384379863739014
2.6350808143615723
2.6320178508758545
2.6292130947113037
2.6266374588012695
2.62426495552063
2.622074604034424
2.6200480461120605
2.618168592453003
2.616422176361084
2.6147966384887695
2.613281488418579
2.6118667125701904
2.6105430126190186
2.6093037128448486
2.6081416606903076
2.607051372528076
2.6060264110565186
2.605062246322632
2.604154348373413
2.6032984256744385
2.6024913787841797
2.601729393005371
2.6010091304779053
2.600328207015991
2.599684476852417
2.599074363708496
2.598496437072754
2.597949266433716
2.5974299907684326
2.5969369411468506
2.596468687057495

In [9]:
# Sampling
g = torch.Generator().manual_seed(12)

for i in range(5):
    out = []
    ix = 0
    while True:
        xenc = F.one_hot(torch.tensor([ix]), num_classes=27).float()
        logits = xenc @ W
        counts = logits.exp()
        p = counts / counts.sum(1, keepdim=True)

        ix = torch.multinomial(p, num_samples=1, replacement=True, generator=g).item()
        out.append(itos[ix]) 
        if ix == 0: break
    
    print(''.join(out))

intterikonaly.
kelayryn.
li.
bkanewbrgesllgfyo.
d.
