In [1]:
import torch
import torch.nn.functional as F


In [2]:
words = open('names.txt', 'r').read().splitlines()

In [3]:
# char to integer mapping
chars = sorted(list(set(''.join(words))))
stoi = {s:i+1 for i,s in enumerate(chars)}
stoi["."] = 0 

# integer to char mapping
itos = {i:s for s,i in stoi.items()}

## Note:
- in the NN version, the input is the first letter, the output is the second letter
- possible outputs are 27 (27 Neurons in the layer)
- thus a `softmax` will be used at the output layer


In [4]:
xs, ys = [], []

In [5]:
for word in words[:1]:
    word = "." + word + "."
    for ch1, ch2 in zip(word, word[1:]):
        xs.append(stoi[ch1])
        ys.append(stoi[ch2])


In [6]:
xs = torch.tensor(xs)
ys = torch.tensor(ys)
num = xs.nelement()

xenc = F.one_hot(xs, num_classes=27).to(torch.float32)
xenc.shape


torch.Size([5, 27])

In [7]:
g = torch.Generator().manual_seed(2147483647)
w = torch.randn((27, 27), requires_grad=True)
w

tensor([[-2.4417e-01,  1.3433e-01,  8.4862e-01, -3.0046e-03,  1.0235e+00,
          2.5525e-01, -7.2576e-01,  9.8405e-01, -8.3659e-01, -6.4309e-01,
         -1.2482e+00,  8.4320e-02,  5.0813e-01,  4.1696e-01, -6.0930e-01,
         -1.0574e-01, -1.3805e-01,  6.7191e-01,  2.2708e-01,  5.9487e-01,
          1.0616e+00,  1.2016e+00, -3.8013e-01, -2.0156e+00,  1.6827e+00,
          4.8591e-01, -5.2556e-02],
        [ 8.4453e-01, -9.9432e-02, -1.0253e+00,  6.9441e-02, -2.0995e+00,
          1.9292e+00, -1.7391e+00, -9.5256e-01,  3.6268e-03,  3.0155e-01,
          1.4338e+00,  2.0275e+00, -2.6807e+00, -8.2516e-01,  1.7354e-02,
          7.8567e-01,  5.1407e-01, -1.2878e+00,  7.0497e-02,  2.6389e-01,
         -8.8480e-01, -1.5451e+00, -5.7936e-02,  1.0274e-01,  9.7375e-01,
          1.1783e+00, -1.2412e-01],
        [-1.4523e+00, -1.9335e-01,  1.4256e+00,  5.5516e-01,  2.3064e+00,
          1.3092e-01, -6.7584e-01, -1.3463e+00, -5.7315e-01, -2.1701e+00,
          8.4077e-01,  1.4423e+00, -1.14

In [8]:
softmax = torch.nn.Softmax(dim=1)

# putting every thing together
for k in range(10):
  # forward pass
  logits = xenc @ w
  y_predict = softmax(logits)

  q = y_predict[torch.arange(num), ys]
  loss = sum(-torch.log(q)).mean()
    
  print(loss.item())
  
  # backward pass
  w.grad = None
  loss.backward()
  w.data += -0.1 * w.grad
    

18.91659927368164
18.39274787902832
17.873640060424805
17.3596134185791
16.851016998291016
16.348228454589844
15.851648330688477
15.361690521240234
14.878787994384766
14.403383255004883
