In [29]:
words = open('names.txt', 'r').read().splitlines()

In [46]:
#Define the character set 
word = sorted(list(set(''.join(words))))

#Using the word which is a sorted list, let's map each character to a number
stoi = {s:i+1 for i,s in enumerate(word)}
stoi['.'] = 0

#Now the vice versa that is mapping a integer to the letter
itos = {i:s for s, i in stoi.items() }
#itos

In [47]:
#Dataset creation
import torch
block_size = 3 #This is the context length, it specifies the number of words/characters you are giving to the model
X = []
Y = []

for w in words:
    content = [0] * block_size
    #print("for the word", w)
    for ch in w + '.':
        inx = stoi[ch]
        X.append(content)
        Y.append(inx)
        #print(f'X is {X}')
        #print(f'Y is {Y}')
        content = content[1:] + [inx]

X = torch.tensor(X) #Converting arrays to tensors
Y = torch.tensor(Y) 

# print(X.shape)
# print(Y.shape)
    

In [48]:
#C here is the look up table (also called the embeddings table)
#Each row of C has the embeddings required for a character
C = torch.rand((27, 2))
#Embed each character in X from C
emb = C[X]
#Let's check the shape of emb
emb.shape
#emb is a 3d vector. Let's convert it into 2d of 27 x 6 where each row is the embedding of the 3 characters in X
it = emb.view(emb.shape[0], 6)
print(it)

tensor([[0.9266, 0.5524, 0.9266, 0.5524, 0.9266, 0.5524],
        [0.9266, 0.5524, 0.9266, 0.5524, 0.3108, 0.5816],
        [0.9266, 0.5524, 0.3108, 0.5816, 0.7910, 0.7240],
        ...,
        [0.2742, 0.4753, 0.2742, 0.4753, 0.9798, 0.2702],
        [0.2742, 0.4753, 0.9798, 0.2702, 0.2742, 0.4753],
        [0.9798, 0.2702, 0.2742, 0.4753, 0.5588, 0.4923]])


In [49]:
#Now the number of training examples x 6 inputs are given as a weighted sum to the hidden layer with 100 neurons
W1 = torch.randn((6, 100))
b1 = torch.randn(100)

#The weighted sums are activated by tanh
h = torch.tanh(it @ W1 + b1)
h.shape

torch.Size([228146, 100])

In [50]:
#The parameters of the hidden layer to the output layer
W2 = torch.randn(100, 27)
b2 = torch.randn(27)

#The output of the last layer
y = h @ W2 + b2

In [51]:
#using the y we got from the neural network and the actual y we will calculate the loss
loss = torch.nn.functional.cross_entropy(y, Y)
loss

tensor(16.0003)

In [52]:
#Let's try to segment the data to minibatches
m = torch.randint(0, X.shape[0], (32,))
m

tensor([ 32544, 113328, 106035, 156777,  82998, 103543,  47505, 162331,  67442,
         46742,  96667, 197993, 147978, 113198, 208979, 167863,  84419, 111198,
        118785, 192752,  60892, 120028, 100259, 136311, 185748, 144211,  51788,
        195850, 184413, 159039, 140130, 227886])

In [63]:
#On combining the whole code, we need to do a forward pass, find the loss, do a backward pass, update the values and repeat the steps again
#Until the loss does not minimise
parameters = [C, W1, W2, b1, b2]
for p in parameters:
    p.requires_grad = True
    
for _ in range(100):

    #Minibatch of 32
    inx = torch.randint(0, X.shape[0], (32,))
    
    #forward pass
    emb = C[X[inx]]
    h= torch.tanh(emb.view(inx.shape[0], 6) @ W1 + b1)
    y = h @ W2 + b2

    #Calculate the loss
    loss = torch.nn.functional.cross_entropy(y, Y[inx])
    #print(loss.item())

    #Back pass
    for p in parameters:
        p.grad = None
    loss.backward()

    #Update my parameters
    for p in parameters:
        p.data += -0.1 * p.grad

print(loss.item())

2.684041976928711
