In [66]:
import torch
import torch.nn.functional as F

words = open('names.txt', 'r').read().splitlines()
len(words)

32033

In [67]:
chars = sorted(list(set(''.join(words))))
str_to_int = {s: i+1 for i, s in enumerate(chars)}
str_to_int['.'] = 0
int_to_str = {i: s for s, i in str_to_int.items()}

In [68]:
block_size = 3 # Context length - how many characters to input
vocab_size = len(chars) # 27
X, Y = [], []

for w in words[:3]:
    context = [0] * block_size
    
    for char in w + '.':
        ix = str_to_int[char]

        X.append(context)
        Y.append(ix)

        context = context[1:] + [ix]

X = torch.tensor(X)
Y = torch.tensor(Y)

In [69]:
emb_size = 2 # Lenght of tensor that represents each character
hidden_layer_neurons = 100

C = torch.randn((vocab_size, emb_size), requires_grad=True)
W1 = torch.randn((emb_size*block_size, hidden_layer_neurons), requires_grad=True)
b1 = torch.randn(hidden_layer_neurons, requires_grad=True)
W2 = torch.randn((hidden_layer_neurons, vocab_size), requires_grad=True)
b2 = torch.randn(vocab_size, requires_grad=True)
parameters = [C, W1, b1, W2, b2]

f'Num parameters: {sum(p.nelement() for p in parameters)}'

'Num parameters: 3378'

In [70]:
lr = 0.1
steps = 100

for i in range(steps):
    # Forward pass
    emb = C[X] # (X.shape[0], block_size, emb_size)
    h = torch.tanh(emb.view(-1, emb_size*block_size) @ W1 + b1) # -1 means infer - (X.shape[0], block_size*emb_size)
    logits = h @ W2 + b2 # (X.shape[0], vocab_size)

    # counts = logits.exp()
    # prob = counts / counts.sum(1, keepdim=True)
    # loss = -prob[torch.arange(16), Y].log().mean()

    loss = F.cross_entropy(logits, Y) # Way more efficient and same result as 3 previous lines

    # Backward pass
    for p in parameters:
        p.grad = None
    loss.backward()

    # Update
    for p in parameters:
        p.data += -lr * p.grad

    if i % 10 == 0:
        print(f'Step {i} with loss {loss.item()}')

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn