# Building Makemore Part II: MLP
https://www.youtube.com/watch?v=TCH_1BHY58I

In [1]:
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
# read in all words
words = open("names.txt", 'r').read().splitlines()
words[:8]

['emma', 'olivia', 'ava', 'isabella', 'sophia', 'charlotte', 'mia', 'amelia']

In [4]:
len(words)

32033

In [18]:
# Build vocabulary of characters and mappings to/from integers
chars = sorted(list(set("".join(words))))
stoi = {s: i+1 for i, s in enumerate(chars)}
stoi['.'] = 0
itos = {i: s for s, i in stoi.items()}
print(itos)

{1: 'a', 2: 'b', 3: 'c', 4: 'd', 5: 'e', 6: 'f', 7: 'g', 8: 'h', 9: 'i', 10: 'j', 11: 'k', 12: 'l', 13: 'm', 14: 'n', 15: 'o', 16: 'p', 17: 'q', 18: 'r', 19: 's', 20: 't', 21: 'u', 22: 'v', 23: 'w', 24: 'x', 25: 'y', 26: 'z', 0: '.'}


Build the Dataset

In [37]:
# Building Dataset
block_size = 3
X, Y = [], []

for w in words[:5]:

    print(f"\n{w}")
    context = [0] * block_size
    for ch in w + '.':
        ix = stoi[ch]
        X.append(context)
        Y.append(ix)
        print("".join(itos[i] for i in context), '⇨', itos[ix], context, "⇨", ix,)
        context = context[1:] + [ix]

X = torch.tensor(X)
Y = torch.tensor(Y)


emma
... ⇨ e [0, 0, 0] ⇨ 5
..e ⇨ m [0, 0, 5] ⇨ 13
.em ⇨ m [0, 5, 13] ⇨ 13
emm ⇨ a [5, 13, 13] ⇨ 1
mma ⇨ . [13, 13, 1] ⇨ 0

olivia
... ⇨ o [0, 0, 0] ⇨ 15
..o ⇨ l [0, 0, 15] ⇨ 12
.ol ⇨ i [0, 15, 12] ⇨ 9
oli ⇨ v [15, 12, 9] ⇨ 22
liv ⇨ i [12, 9, 22] ⇨ 9
ivi ⇨ a [9, 22, 9] ⇨ 1
via ⇨ . [22, 9, 1] ⇨ 0

ava
... ⇨ a [0, 0, 0] ⇨ 1
..a ⇨ v [0, 0, 1] ⇨ 22
.av ⇨ a [0, 1, 22] ⇨ 1
ava ⇨ . [1, 22, 1] ⇨ 0

isabella
... ⇨ i [0, 0, 0] ⇨ 9
..i ⇨ s [0, 0, 9] ⇨ 19
.is ⇨ a [0, 9, 19] ⇨ 1
isa ⇨ b [9, 19, 1] ⇨ 2
sab ⇨ e [19, 1, 2] ⇨ 5
abe ⇨ l [1, 2, 5] ⇨ 12
bel ⇨ l [2, 5, 12] ⇨ 12
ell ⇨ a [5, 12, 12] ⇨ 1
lla ⇨ . [12, 12, 1] ⇨ 0

sophia
... ⇨ s [0, 0, 0] ⇨ 19
..s ⇨ o [0, 0, 19] ⇨ 15
.so ⇨ p [0, 19, 15] ⇨ 16
sop ⇨ h [19, 15, 16] ⇨ 8
oph ⇨ i [15, 16, 8] ⇨ 9
phi ⇨ a [16, 8, 9] ⇨ 1
hia ⇨ . [8, 9, 1] ⇨ 0


In [26]:
X[:5]

tensor([[ 0,  0,  0],
        [ 0,  0,  5],
        [ 0,  5, 13],
        [ 5, 13, 13],
        [13, 13,  1]])

In [28]:
Y[:5]

tensor([ 5, 13, 13,  1,  0])

In [29]:
X.shape, X.dtype, Y.shape, Y.dtype

(torch.Size([32, 3]), torch.int64, torch.Size([32]), torch.int64)

Creating the Lookup Table

In [38]:
C = torch.randn((27, 2))
C

tensor([[ 0.3650, -0.3895],
        [-0.9772, -1.4756],
        [ 0.4431, -0.1886],
        [-0.9892, -0.1806],
        [ 0.1006, -0.7739],
        [-0.4891, -0.5384],
        [ 0.6448, -0.0225],
        [ 0.5524,  0.4478],
        [-0.2687, -1.4652],
        [ 0.0409, -1.1418],
        [ 0.9349, -0.3741],
        [-1.4909, -0.8147],
        [ 1.4948,  0.2520],
        [ 0.9366,  1.9814],
        [ 1.0121, -0.6123],
        [-0.6807,  0.4954],
        [ 0.6293, -0.3215],
        [ 1.1528,  0.5102],
        [ 1.0275,  0.2026],
        [ 0.7935, -0.8536],
        [-1.4935,  1.1331],
        [-0.6322,  1.2386],
        [-1.0048, -1.0912],
        [ 1.6902,  0.3304],
        [-1.5179, -1.2812],
        [ 0.6197, -0.5310],
        [ 0.7618,  0.4061]])

In [39]:
C[5]

tensor([-0.4891, -0.5384])

In [41]:
F.one_hot(torch.tensor(5), num_classes=27)

tensor([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0])

In [43]:
F.one_hot(torch.tensor(5), num_classes=27).float() @ C

tensor([-0.4891, -0.5384])

https://youtu.be/TCH_1BHY58I?si=a4cpoBVyRsDQpdkn&t=761