In [1]:
import numpy as np
import torch
import torch.nn.functional as F
import random

In [2]:
# opening the file in read mode 
my_file = open("../data/names.txt", "r") 
# reading the file 
data = my_file.read()
# split the names into a list
name_list = data.split('\n')
name_list = ["."+x for x in name_list]

In [3]:
chartoix = {char: ix for ix, char in enumerate(sorted(set("".join(name_list))))}
ixtochar = {ix: char for ix, char in enumerate(sorted(set("".join(name_list))))}

In [4]:
def build_x_y(name_list):
    # Create a dataset with, for each example, all possible X and Y combinations we can extract from it
    block_size = 3
    y_list = []
    x_list = []
    for name in name_list:
        for target_ix, target_letter in enumerate(name):
            previous_letters = name[max(-(block_size)+target_ix,0):target_ix] # This will extract the previous 3 letters
            num_pads = block_size - len(previous_letters)
            previous_letters = "."*num_pads + previous_letters
            #print(previous_letters + "-->" + target_letter)
            y_list.append(chartoix[target_letter])
            x_list.append([chartoix[input_letter] for input_letter in previous_letters])
            
    X = torch.tensor(x_list) # Tensor with 3 dimensional list, input letter index
    Y = torch.tensor(y_list) # Tensor with 1 dimensional output
    return X,Y

In [5]:
ix_80_pct = int(0.8*len(name_list))
ix_90_pct = int(0.9*len(name_list))

In [6]:
X_train, Y_train = build_x_y(name_list[:ix_80_pct])
X_val, Y_val= build_x_y(name_list[ix_80_pct:ix_90_pct])
X_test, Y_test = build_x_y(name_list[ix_90_pct:])

In [7]:
X_train.shape, Y_train.shape, X_val.shape, Y_val.shape, X_test.shape, Y_test.shape

(torch.Size([182778, 3]),
 torch.Size([182778]),
 torch.Size([22633, 3]),
 torch.Size([22633]),
 torch.Size([22735, 3]),
 torch.Size([22735]))

In [17]:
###### Parameters ########
g = torch.Generator().manual_seed(2147483647)
C = torch.randn(27,10)
W1 = torch.randn(30,300)
b1 = torch.randn(300)
W2 = torch.randn(300,27)
b2 = torch.randn(27)
parameters = [C, W1, b1, W2, b2]
for p in parameters:
    p.requires_grad = True

In [18]:
losses = []
steps = []
for i in range(200000):
    ###### Minibatch #######
    minibatch_size=32
    minibatch_ix = torch.randint(0,len(X_train),(minibatch_size,))
    
    ###### Forward pass ########
    embs = C[X_train[minibatch_ix]]
    h = torch.tanh(embs.view(-1,30) @ W1 + b1) #embs.view will reshape
    logits = h @ W2 + b2
    loss = F.cross_entropy(logits,Y_train[minibatch_ix]) #This makes forward/backward pass more efficient and makes things more well behaved numerically for large vals.

    ###### Backward pass ########
    for p in parameters:
        p.grad = None
    loss.backward()
    lr = 0.1 if i < 100000 else 0.01
    for p in parameters:
        p.data += -lr*p.grad
    steps.append(i)
    losses.append(loss.item())

In [19]:
loss.item()

1.9216910600662231

In [20]:
# validation loss
embs_val = C[X_val]
h_val = torch.tanh(embs_val.view(-1,30) @ W1 + b1)
logits_val = h_val @ W2 + b2
val_loss = F.cross_entropy(logits_val,Y_val) #This makes forward/backward pass more efficient and makes things more well behaved numerically for large vals.
print(val_loss.item())

2.333253860473633
