In [1]:
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt

In [67]:
words = open('names.txt', 'r').read().splitlines()

In [3]:
len(words)

32033

In [4]:
# Build the vocabulary of characters and mappints to / from integers
chars = sorted(list(set(''.join(words))))
stoi = {s:i+1 for i, s in enumerate(chars)}
stoi['.'] = 0
itos = {i:s for s, i in stoi.items()}
# print(itos)

In [5]:
# Build DataSet

block_size = 3 # context length: how many characters do we take to predict the next one?
X, Y = [], []
for w in words[:]:
  context = [0] * block_size
  for ch in w + '.':
    ix = stoi[ch]
    X.append(context)
    Y.append(ix)
    # print(''.join(itos[i] for i in context), '--->', itos[ix])
    context = context[1:] + [ix] # crop and append
  
X = torch.tensor(X)
Y = torch.tensor(Y)

In [83]:
X.shape, Y.shape

(torch.Size([228146, 3]), torch.Size([228146]))

In [85]:
# Build DataSet

def build_dataset(words):
    block_size = 3 # context length: how many characters do we take to predict the next one?
    X, Y = [], []
    for w in words[:]:
      context = [0] * block_size
      for ch in w + '.':
        ix = stoi[ch]
        X.append(context)
        Y.append(ix)
        # print(''.join(itos[i] for i in context), '--->', itos[ix])
        context = context[1:] + [ix] # crop and append
      
    X = torch.tensor(X)
    Y = torch.tensor(Y)
    return X, Y

import random
random.seed(42)
random.shuffle(words)
n1 = int(0.8 * len(words))
n2 =int(0.9 * len(words))

Xtr, Ytr = build_dataset(words[:n1])
Xdev, Ydev = build_dataset(words[n1:n2])
Xte, Yte = build_dataset(words[n2:])

In [77]:
dim = 2
C = torch.randn((27,2))

In [79]:
C[5]

tensor([-0.0656, -0.1687])

In [195]:
emb = C[X]
emb.shape

torch.Size([32, 3, 2])

In [197]:
W1 = torch.randn((6, 100))
b1 = torch.randn(100)

In [171]:
# torch.cat(torch.unbind(emb,1), 1).shape

In [203]:
h = torch.tanh(emb.view(-1,6) @ W1 + b1)

In [205]:
h.shape

torch.Size([32, 100])

In [207]:
W2 = torch.randn((100,27))
b2 = torch.randn(27)

In [209]:
logits = h @ W2 + b2 

In [211]:
logits.shape

torch.Size([32, 27])

In [213]:
counts = logits.exp()
prob = counts / counts.sum(1, keepdims = True)

In [223]:
loss = -prob[torch.arange(32),Y].log().mean()

In [195]:
g = torch.Generator().manual_seed(2147483647)
C = torch.randn((27,10))
W1 = torch.randn((30, 200))
b1 = torch.randn(200)
W2 = torch.randn((200,27))
b2 = torch.randn(27)
parameters = [C, W1, b1, W2, b2]
sum(p.nelement() for p in parameters)

11897

In [197]:
for p in parameters:
    p.requires_grad = True

In [199]:
lre = torch.linspace(-3, 0, 1000)
lrs = 10**lre
lrs

tensor([0.0010, 0.0010, 0.0010, 0.0010, 0.0010, 0.0010, 0.0010, 0.0010, 0.0011,
        0.0011, 0.0011, 0.0011, 0.0011, 0.0011, 0.0011, 0.0011, 0.0011, 0.0011,
        0.0011, 0.0011, 0.0011, 0.0012, 0.0012, 0.0012, 0.0012, 0.0012, 0.0012,
        0.0012, 0.0012, 0.0012, 0.0012, 0.0012, 0.0012, 0.0013, 0.0013, 0.0013,
        0.0013, 0.0013, 0.0013, 0.0013, 0.0013, 0.0013, 0.0013, 0.0013, 0.0014,
        0.0014, 0.0014, 0.0014, 0.0014, 0.0014, 0.0014, 0.0014, 0.0014, 0.0014,
        0.0015, 0.0015, 0.0015, 0.0015, 0.0015, 0.0015, 0.0015, 0.0015, 0.0015,
        0.0015, 0.0016, 0.0016, 0.0016, 0.0016, 0.0016, 0.0016, 0.0016, 0.0016,
        0.0016, 0.0017, 0.0017, 0.0017, 0.0017, 0.0017, 0.0017, 0.0017, 0.0017,
        0.0018, 0.0018, 0.0018, 0.0018, 0.0018, 0.0018, 0.0018, 0.0018, 0.0019,
        0.0019, 0.0019, 0.0019, 0.0019, 0.0019, 0.0019, 0.0019, 0.0020, 0.0020,
        0.0020, 0.0020, 0.0020, 0.0020, 0.0020, 0.0021, 0.0021, 0.0021, 0.0021,
        0.0021, 0.0021, 0.0021, 0.0022, 

In [201]:
# Forward Pass

lri = []
lossi = []
stepi = []

In [249]:
for i in range(50000):
    # minibatch construct
    ix = torch.randint(0,Xtr.shape[0],(32,))
    emb = C[Xtr[ix]]
    h = torch.tanh(emb.view(-1,30) @ W1 + b1)
    logits = h @ W2 + b2
    # counts = logits.exp()
    # prob = counts / counts.sum(1, keepdims = True)
    # loss = -prob[torch.arange(32), Y].log().mean()
    loss = F.cross_entropy(logits, Ytr[ix])
    # print('Loss = ', loss.item())
    # BackWard Pass
    for p in parameters:
        p.grad = None
    loss.backward()
    # lr = lrs[i]
    for p in parameters:
        p.data += -0.01 * p.grad
    # lri.append(lre[i])
    lossi.append(loss.log10().item())
    stepi.append(i)
# lossi

In [None]:
# plt.plot(lri, lossi)

[<matplotlib.lines.Line2D at 0x1acd0640f50>]

In [251]:
emb = C[Xdev]
h = torch.tanh(emb.view(-1,30) @ W1 + b1)
logits = h @ W2 + b2
loss = F.cross_entropy(logits, Ydev)
print(loss.item())

2.1995961666107178


In [253]:
emb = C[Xte]
h = torch.tanh(emb.view(-1,30) @ W1 + b1)
logits = h @ W2 + b2
loss = F.cross_entropy(logits, Yte)
print(loss.item())

2.197334051132202


In [255]:
# training split , dev / validation split, test split
# 80%, 10%, 10%

g = torch.Generator().manual_seed(2147483647 + 10)

for _ in range(20):
    out = []
    context = [0] * block_size
    while True:
        emb = C[torch.tensor([context])]
        h = torch.tanh(emb.view(1, -1) @ W1 + b1)
        logits = h @ W2 + b2
        probs = F.softmax(logits, dim=1)
        ix = torch.multinomial(probs, num_samples = 1, generator=g).item()
        context = context[1:] + [ix]
        out.append(ix)
        if ix == 0:
            break
    print(''. join(itos[i] for i in out))

mria.
kayanniee.
med.
ryla.
remurstendramgraqmie.
selin.
shy.
jen.
eden.
esmanarleitziohna.
amn.
shretzishivies.
kin.
renelinn.
berlyn.
breygh.
ryyah.
fael.
yumajavonny.
azhia.
