In [61]:
import torch
import numpy

words = open("../names.txt", "r").read().splitlines()

N = torch.zeros((33, 33), dtype=torch.int32)

characters = sorted(list(set(''.join(words))))
stoi = {s:i+1 for i, s in enumerate(characters)}
stoi['.'] = 0
itos = {i:s for s, i in stoi.items()}

for w in words:
    chars = ['.'] + list(w) + ['.']
    for ch1, ch2 in zip(chars, chars[1:]):
        ix1 = stoi[ch1]
        ix2 = stoi[ch2]
        N[ix1, ix2] += 1

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

plt.figure(figsize=(16,16))
plt.imshow(N, cmap='Blues')
for i in range(33):
    for j in range(33):
        chstr = itos[i] + itos[j]
        plt.text(j, i, chstr, ha='center', va='bottom', color='gray')
        plt.text(j, i, N[i, j].item(), ha='center', va='top', color='gray')
plt.axis('off')

In [None]:
P = (N+1).float()
P /= P.sum(1, keepdim=True)

generator = torch.Generator().manual_seed(2147483647)
for i in range(20):
    out = []
    ix = 0
    while True:
        p = P[ix]
        ix = torch.multinomial(p, num_samples=1, replacement=True, generator=generator).item()
        out.append(itos[ix])
        if ix == 0:
            break
    print(''.join(out))


In [None]:
log_likelihood = 0.0
n = 0
for w in words:
    chs = ['.'] + list(w) + ['.']
    for ch1, ch2 in zip(chs, chs[1:]):
        ix1 = stoi[ch1]
        ix2 = stoi[ch2]
        prob = P[ix1, ix2]
        logprob = torch.log(prob)
        log_likelihood += logprob
        n+=1
        # print(f'{ch1}{ch2} {prob:.4f} {logprob:.4f}')
print(f'Log likelihood: {log_likelihood:.4f}')
nll = -log_likelihood
print(f'Negative log likelihood: {nll:.4f}')
print(f'Normalized negative log likelihood: {nll/n:.4f}')

In [None]:

xs, ys = [], []

for w in words[:1]:
    chs = ['.'] + list(w) + ['.']
    for ch1, ch2 in zip(chs, chs[1:]):
        ix1 = stoi[ch1]
        ix2 = stoi[ch2]
        print(ch1, ch2)
        xs.append(ix1)
        ys.append(ix2)
xs = torch.tensor(xs)
ys = torch.tensor(ys)
        


In [None]:

xs, ys = [], []
for w in words:
  chs = ['.'] + list(w) + ['.']
  for ch1, ch2 in zip(chs, chs[1:]):
    ix1 = stoi[ch1]
    ix2 = stoi[ch2]
    xs.append(ix1)
    ys.append(ix2)
xs = torch.tensor(xs)
ys = torch.tensor(ys)
num = xs.nelement()
print('number of examples: ', num)

g = torch.Generator().manual_seed(2147483647)
W = torch.randn((33, 33), generator=g, requires_grad=True)

In [None]:

for k in range(5000):
  learning_rate = -50
  if k > 1000:
    learning_rate += k * 0.005
  xenc = F.one_hot(xs, num_classes=33).float()
  logits = xenc @ W 
  counts = logits.exp()
  probs = counts / counts.sum(1, keepdims=True)
  loss = -probs[torch.arange(num), ys].log().mean() + 0.00001 * (W**2).mean()
  print(loss.item())
  
  W.grad = None
  loss.backward()
  
  W.data += learning_rate * W.grad

In [None]:
import torch.nn.functional as F

nlls = torch.zeros(5)
for i in range(5):
    x = xs[i].item()
    y = ys[i].item()
    print("----------------")
    print(f"bigram example {i+1}: {itos[x]}{itos[y]} (index {x}, {y})")
    print("input to the neural net: ", x)
    print("output probabilities from the neural net: ", probs[i])
    print("label (actual next character): ", y)
    p = probs[i, y]
    print("probability assigned to the actual next character: ", p.item())
    logp = torch.log(p)
    print("log likelihood: ", logp.item())
    nll = -logp
    print("negative log likelihood: ", nll.item())
    nlls[i] = nll
print("----------------")
print("average negative log likelihood, i.e. loss =  ", nlls.mean().item())

In [None]:
g = torch.Generator().manual_seed(2147483647)

for i in range(10):
  
  out = []
  ix = 0
  while True:
    xenc = F.one_hot(torch.tensor([ix]), num_classes=33).float()
    logits = xenc @ W
    counts = logits.exp()
    p = counts / counts.sum(1, keepdims=True)
    
    ix = torch.multinomial(p, num_samples=1, replacement=True, generator=g).item()
    out.append(itos[ix])
    if ix == 0:
      break
  if (len(out) > 3):
    print(''.join(out))
