In [None]:
import numpy as np
import matplotlib.pyplot as plt
import random
import torch
import torch.nn.functional as F

In [None]:
words = open("names.txt").read().splitlines()

In [None]:
###Min and max word length
lengths = [len(w) for w in words]
minlength = min(lengths)
maxlength = max(lengths)
print(f"{minlength=}, {maxlength=}")
minwords = [w for w in words if len(w) == minlength]
maxwords = [w for w in words if len(w) == maxlength]
print(f"MinLength words:{minwords}")
print(f"MaxLength words:{maxwords}")

In [None]:
charlist = sorted(set("".join(words)))
charlist = ['.'] + charlist
numfromchar = {ch : k for k,ch in enumerate(charlist)}
charfromnum = {k : ch for k,ch in enumerate(charlist)}

f = [[0] * len(charlist) for _ in range(len(charlist))]

for w in words:
    t = '.' + w + '.'
    for ch1,ch2 in zip(t, t[1:]):
        idx1 = numfromchar[ch1]
        idx2 = numfromchar[ch2]
        f[idx1][idx2] += 1


plt.figure(figsize=(16, 16));
plt.imshow(f, cmap="Blues");
for row in range(len(f)):
    for col in range(len(f[0])):
        label = charfromnum[row] + charfromnum[col]
        plt.text(row, col, label, ha="center", va="bottom", color="gray")
        plt.text(row, col, f[row][col], ha="center", va="top", color="gray")
plt.axis("off")

In [None]:
###Test the model - generate some random words

g = []
sentinel = 1.0
for row in range(len(f)):
    rowsum = sum(f[row]) + sentinel * len(f[row])
    cur = [(x + sentinel) / rowsum for x in f[row]]
    g.append(cur)

def generateRandomName():
    idx = 0
    res = ""
    while True:
        idx = max(idx, 0) #For the initial iteration
        cur = np.cumsum(g[idx])
        rdn = random.random()
        for k in range(len(cur)):
            if rdn <= cur[k]:
                idx = k
                break
        if not idx: break
        res += charfromnum[idx]

    return res

for _ in range(10): print(generateRandomName())


In [None]:

def wordnll(word:str) -> float:
    t = '.' + word + '.'
    sumnll = 0.0
    cnt = 0
    for ch1,ch2 in zip(t, t[1:]):
        idx1 = numfromchar[ch1]
        idx2 = numfromchar[ch2]
        cnt += 1
        sumnll -= np.log(g[idx1][idx2])
    return  sumnll / cnt

print([wordnll(x) for x in words[:10]])
print(wordnll("dionysios"))

def allwordsnll(words:list) -> float:
    sumnll = 0.0
    cnt = 0
    for w in words:
        t = '.' + w + '.'
        for ch1,ch2 in zip(t, t[1:]):
            idx1 = numfromchar[ch1]
            idx2 = numfromchar[ch2]
            cnt += 1
            sumnll -= np.log(g[idx1][idx2])
    return  sumnll / cnt

print(allwordsnll(words))

In [None]:
###Neural Network Implementation with PyTorch

xs = list()
ys = list()
nc = len(charlist)

for w in words:
    t = '.' + w + '.'
    for ch1,ch2 in zip(t, t[1:]):
        idx1 = numfromchar[ch1]
        idx2 = numfromchar[ch2]
        xs.append(idx1)
        ys.append(idx2)

numtrain = len(xs)
xs = torch.tensor(xs)
ys = torch.tensor(ys)

xenc = F.one_hot(xs, num_classes=nc).float()
yenc = F.one_hot(ys, num_classes=nc).float()
print(xenc.shape, yenc.shape)
plt.imshow(xenc[:20]);

In [None]:
W = torch.randn((nc,nc))
logits = xenc @ W
counts = logits.exp()
probs = counts / counts.sum(1, keepdims=True)

In [None]:
g = torch.Generator().manual_seed(2147483647)
W = torch.randn((27,27), generator=g, requires_grad=True)

xenc = F.one_hot(xs, num_classes=nc).float()
logits = xenc @ W
counts = logits.exp()
probs = counts / counts.sum(1, keepdims=True)

nlls = torch.zeros(5)
for k in range(5):
    x = xs[k].item()
    y = ys[k].item()
    nlls[k] = -torch.log(probs[k, y])
    print(f"{k=} nll={nlls[k].item()}")

print(nlls.mean().item())
loss = -probs[torch.arange(numtrain), ys].log().mean()
print(loss)



In [None]:
for k in range(1000):
    xenc = F.one_hot(xs, num_classes=27).float()
    logits = xenc @ W
    counts = logits.exp()
    probs = counts / counts.sum(1, keepdims=True)
    loss = -probs[torch.arange(numtrain), ys].log().mean()
    W.grad = None
    if k % 50 == 0: print(loss)
    loss.backward()
    W.data -= 1 * W.grad