In [78]:
words = open("../names.txt", "r").read().splitlines()

In [None]:
words[:10]

In [80]:
b = {}
for w in words:
    chars = ['<S>'] + list(w) + ['<E>']
    for ch1, ch2 in zip(chars, chars[1:]):
        bigram = (ch1, ch2)
        b[bigram] = b.get(bigram, 0) + 1

In [None]:
sorted(b.items(), key=lambda k: k[1], reverse=True)

In [82]:
import torch
import numpy

In [83]:
a = torch.zeros((3, 5), dtype=torch.int32)

In [None]:
a

In [None]:
a.dtype

In [86]:
chars = []
for w in words:
    for c in w:
        if c not in chars:
            chars.append(c)

In [None]:
len(chars)

In [88]:
N = torch.zeros((33, 33), dtype=torch.int32)

In [89]:
characters = sorted(list(set(''.join(words))))
stoi = {s:i+1 for i, s in enumerate(characters)}
stoi['.'] = 0
itos = {i:s for s, i in stoi.items()}

In [90]:
for w in words:
    chars = ['.'] + list(w) + ['.']
    for ch1, ch2 in zip(chars, chars[1:]):
        ix1 = stoi[ch1]
        ix2 = stoi[ch2]
        N[ix1, ix2] += 1

In [91]:
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
plt.figure(figsize=(16,16))
plt.imshow(N, cmap='Blues')
for i in range(33):
    for j in range(33):
        chstr = itos[i] + itos[j]
        plt.text(j, i, chstr, ha='center', va='bottom', color='gray')
        plt.text(j, i, N[i, j].item(), ha='center', va='top', color='gray')
plt.axis('off')

In [None]:
N[0].shape

In [94]:
P = (N+1).float()
P /= P.sum(1, keepdim=True)


In [None]:
generator = torch.Generator().manual_seed(2147483647)
for i in range(20):
    out = []
    ix = 0
    while True:
        p = P[ix]
        ix = torch.multinomial(p, num_samples=1, replacement=True, generator=generator).item()
        out.append(itos[ix])
        if ix == 0:
            break
    print(''.join(out))

In [None]:
log_likelihood = 0.0
n = 0
for w in words:
    chs = ['.'] + list(w) + ['.']
    for ch1, ch2 in zip(chs, chs[1:]):
        ix1 = stoi[ch1]
        ix2 = stoi[ch2]
        prob = P[ix1, ix2]
        logprob = torch.log(prob)
        log_likelihood += logprob
        n+=1
        # print(f'{ch1}{ch2} {prob:.4f} {logprob:.4f}')
print(f'Log likelihood: {log_likelihood:.4f}')
nll = -log_likelihood
print(f'Negative log likelihood: {nll:.4f}')
print(f'Normalized negative log likelihood: {nll/n:.4f}')

In [None]:
#training set of bigrams (x, y) where x is a character and y is the next character

xs, ys = [], []

for w in words[:1]:
    chs = ['.'] + list(w) + ['.']
    for ch1, ch2 in zip(chs, chs[1:]):
        ix1 = stoi[ch1]
        ix2 = stoi[ch2]
        print(ch1, ch2)
        xs.append(ix1)
        ys.append(ix2)
xs = torch.tensor(xs)
ys = torch.tensor(ys)
        


In [None]:
xs

In [None]:
ys

In [117]:
import torch.nn.functional as F
g = torch.Generator().manual_seed(2147483647)
W = torch.randn((33, 33), generator=g, requires_grad=True)


In [125]:
xenc = F.one_hot(xs, num_classes=33).float()
logits = xenc @ W
counts = logits.exp()
probs = counts / counts.sum(1, keepdim=True)
loss = -probs[torch.arange(5), ys].log().mean()

In [None]:
print(loss.item())

In [123]:
# backward pass
W.grad = None
loss.backward()

In [124]:
W.data += -0.1 * W.grad

In [None]:
print(loss.item())

In [None]:
nlls = torch.zeros(5)
for i in range(5):
    x = xs[i].item()
    y = ys[i].item()
    print("----------------")
    print(f"bigram example {i+1}: {itos[x]}{itos[y]} (index {x}, {y})")
    print("input to the neural net: ", x)
    print("output probabilities from the neural net: ", probs[i])
    print("label (actual next character): ", y)
    p = probs[i, y]
    print("probability assigned to the actual next character: ", p.item())
    logp = torch.log(p)
    print("log likelihood: ", logp.item())
    nll = -logp
    print("negative log likelihood: ", nll.item())
    nlls[i] = nll
print("----------------")
print("average negative log likelihood, i.e. loss =  ", nlls.mean().item())

In [None]:
# create the dataset
xs, ys = [], []
for w in words:
  chs = ['.'] + list(w) + ['.']
  for ch1, ch2 in zip(chs, chs[1:]):
    ix1 = stoi[ch1]
    ix2 = stoi[ch2]
    xs.append(ix1)
    ys.append(ix2)
xs = torch.tensor(xs)
ys = torch.tensor(ys)
num = xs.nelement()
print('number of examples: ', num)

# initialize the 'network'
g = torch.Generator().manual_seed(2147483647)
W = torch.randn((33, 33), generator=g, requires_grad=True)

In [None]:
# gradient descent
for k in range(100):
  
  # forward pass
  xenc = F.one_hot(xs, num_classes=33).float() # input to the network: one-hot encoding
  logits = xenc @ W # predict log-counts
  counts = logits.exp() # counts, equivalent to N
  probs = counts / counts.sum(1, keepdims=True) # probabilities for next character
  loss = -probs[torch.arange(num), ys].log().mean()
  print(loss.item())
  
  # backward pass
  W.grad = None # set to zero the gradient
  loss.backward()
  
  # update
  W.data += -50 * W.grad

In [None]:
# finally, sample from the 'neural net' model
g = torch.Generator().manual_seed(2147483647)

for i in range(5):
  
  out = []
  ix = 0
  while True:
    
    # ----------
    # BEFORE:
    #p = P[ix]
    # ----------
    # NOW:
    xenc = F.one_hot(torch.tensor([ix]), num_classes=33).float()
    logits = xenc @ W # predict log-counts
    counts = logits.exp() # counts, equivalent to N
    p = counts / counts.sum(1, keepdims=True) # probabilities for next character
    # ----------
    
    ix = torch.multinomial(p, num_samples=1, replacement=True, generator=g).item()
    out.append(itos[ix])
    if ix == 0:
      break
  print(''.join(out))