In [190]:
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
import random
words = open("names.txt", 'r').read().splitlines()

In [254]:
N = torch.zeros((27,27), dtype=torch.int32)
chars = sorted(list(set(''.join(words))))
c_to_i = {c:i+1 for i,c in enumerate(chars)}
c_to_i['.'] = 0
i_to_c = {i+1:c for i,c in enumerate(chars)}
i_to_c[0] = '.'

In [255]:
def generate_dataset(word_list):
    block_size = 3
    X, Y = [], []

    for word in word_list:
        vector = [0] * block_size
        for c in word + '.':
            val = c_to_i[c]
            X.append(vector)
            Y.append(val)
            vector = vector[1:] + [val]

    X = torch.tensor(X)
    Y = torch.tensor(Y)
    
    return X, Y

In [256]:
random.shuffle(words)

n1 = int(0.8 * len(words))
n2 = int(0.9 * len(words))
X_train, Y_train = generate_dataset(words[:n1])
X_val, Y_val = generate_dataset(words[n1:n2])
X_test, Y_test = generate_dataset(words[n2:])

In [257]:
C = torch.randn([27,10])

W1 = torch.randn([30,200])
b1 = torch.randn(200)

W2 = torch.randn((200,27))
b2 = torch.randn(27)

parameters = [C, W1, b1, W2, b2]
for p in parameters:
    p.requires_grad = True

In [258]:
#lrs = 10**torch.linspace(-3,0,1000)
#lrs.shape

In [277]:
loss_t = []

for i in range(60000):
    x_inds = torch.randint(0, X_train.shape[0], (32,))
    
    raw_emb = C[X[x_inds]]
    emb = raw_emb.view(raw_emb.shape[0],30)
    h1 = torch.tanh(emb @ W1 + b1)
    logits = h1 @ W2 + b2
    loss = F.cross_entropy(logits, Y_train[x_inds])
    
    #loss_t.append(loss.item())
    
    for p in parameters:
        p.grad = None
        
    loss.backward()
    
    for p in parameters:
        #p.data += -lrs[i] * p.grad
        p.data += -0.01 * p.grad

In [278]:
x_inds = np.arange(X_train.shape[0])
raw_emb = C[X[x_inds]]
emb = raw_emb.view(raw_emb.shape[0],30)
h1 = torch.tanh(emb @ W1 + b1)
logits = h1 @ W2 + b2
loss = F.cross_entropy(logits, Y_train[x_inds])
loss

tensor(2.8255, grad_fn=<NllLossBackward0>)

In [279]:
#plt.plot(lrs, loss_t)
#plt.plot(np.arange(10000),loss_t)

In [280]:
x_inds = np.arange(X_val.shape[0])
raw_emb = C[X[x_inds]]
emb = raw_emb.view(raw_emb.shape[0],30)
h1 = torch.tanh(emb @ W1 + b1)
logits = h1 @ W2 + b2
loss = F.cross_entropy(logits, Y_val[x_inds])
loss

tensor(2.8457, grad_fn=<NllLossBackward0>)