In [42]:
words = open('names.txt', 'r').read().splitlines()

In [43]:
#Define the character set 
word = sorted(list(set(''.join(words))))

#Using the word which is a sorted list, let's map each character to a number
stoi = {s:i+1 for i,s in enumerate(word)}
stoi['.'] = 0

#Now the vice versa that is mapping a integer to the letter
itos = {i:s for s, i in stoi.items() }
#itos

In [44]:
#Dataset creation
import torch
block_size = 3 #This is the context length, it specifies the number of words/characters you are giving to the model

def build_dataset():
    X = []
    Y = []

    for w in words:
        content = [0] * block_size
        for ch in w + '.':
            inx = stoi[ch]
            X.append(content)
            Y.append(inx)
            content = content[1:] + [inx]

    X = torch.tensor(X)
    Y = torch.tensor(Y)
    return X,Y

n1 = int(0.8 * len(words))
n2 = int(0.9 * len(words))
import random
random.seed(42)
random.shuffle(words)
X, Y = build_dataset()
Xtr, Ytr = X[:n1], Y[:n1]
Xdev, Ydev = X[n1: n2], Y[n1: n2]
Xval, Yval = X[n2:], Y[n2:]

print(Xtr.shape)
print(Ytr.shape)
    

torch.Size([25626, 3])
torch.Size([25626])


In [45]:
#C here is the look up table (also called the embeddings table)
#Each row of C has the embeddings required for a character
C = torch.rand((27, 6))
#Embed each character in X from C
emb = C[Xtr]
#Let's check the shape of emb
emb.shape
#emb is a 3d vector. Let's convert it into 2d of 27 x 6 where each row is the embedding of the 3 characters in X
it = emb.view(emb.shape[0], 18)
print(it)

tensor([[0.0301, 0.1583, 0.9067,  ..., 0.5022, 0.7398, 0.7591],
        [0.0301, 0.1583, 0.9067,  ..., 0.3247, 0.2461, 0.2333],
        [0.0301, 0.1583, 0.9067,  ..., 0.3738, 0.8638, 0.2045],
        ...,
        [0.9555, 0.2670, 0.9635,  ..., 0.4045, 0.8248, 0.7732],
        [0.3418, 0.2736, 0.5181,  ..., 0.3247, 0.2461, 0.2333],
        [0.3502, 0.1404, 0.3662,  ..., 0.3120, 0.4669, 0.4624]])


In [46]:
#Now the number of training examples x 6 inputs are given as a weighted sum to the hidden layer with 100 neurons
W1 = torch.randn((18, 200))
b1 = torch.randn(200)

#The weighted sums are activated by tanh
h = torch.tanh(it @ W1 + b1)
h.shape

torch.Size([25626, 200])

In [47]:
#The parameters of the hidden layer to the output layer
W2 = torch.randn(200, 27)
b2 = torch.randn(27)

#The output of the last layer
y = h @ W2 + b2

In [48]:
#using the y we got from the neural network and the actual y we will calculate the loss
loss = torch.nn.functional.cross_entropy(y, Ytr)
loss

tensor(24.0209)

In [49]:
#Let's try to segment the data to minibatches
m = torch.randint(0, X.shape[0], (32,))
m

tensor([ 91035, 153337,  46270, 154526, 164606,  77762,   9271,   2387,  22859,
        131215,  70777, 161403, 222269,  84293, 146951, 141718,   5186, 175837,
        226281, 108110, 202319, 143982, 131438,  86076, 136173,    279, 142368,
         89236, 208830,  26930, 208784, 157462])

In [56]:
#On combining the whole code, we need to do a forward pass, find the loss, do a backward pass, update the values and repeat the steps again
#Until the loss does not minimise
parameters = [C, W1, W2, b1, b2]
for p in parameters:
    p.requires_grad = True
    
for _ in range(1000):

    #Minibatch of 32
    inx = torch.randint(0, Xtr.shape[0], (32,))
    
    #forward pass
    emb = C[Xtr[inx]]
    h= torch.tanh(emb.view(inx.shape[0], 18) @ W1 + b1)
    y = h @ W2 + b2

    #Calculate the loss
    loss = torch.nn.functional.cross_entropy(y, Ytr[inx])
    #print(loss.item())

    #Back pass
    for p in parameters:
        p.grad = None
    loss.backward()

    #Update my parameters
    for p in parameters:
        p.data += -0.01 * p.grad

print(loss.item())

2.1890413761138916


In [57]:
#Evaluate on dev data
emb = C[Xdev]
h= torch.tanh(emb.view(Xdev.shape[0], 18) @ W1 + b1)
y = h @ W2 + b2
loss = torch.nn.functional.cross_entropy(y, Ydev)
loss.item()

2.327741861343384

In [60]:
#Sampling from the data

for i in range(20):
    content = [0] * block_size
    #print(content)
    out = []
    while True:
        emb = C[torch.tensor(content)]
        #print(emb)
        h = torch.tanh(emb.view(1, -1) @ W1 + b1)
        y = h @ W2 + b2
        logits = torch.nn.functional.softmax(y, dim=1)
        inx = torch.multinomial(logits, num_samples=1).item()
        content = content[1:] + [inx]
        out.append(inx)
        if inx == 0:
            break

    print(''.join(itos[i] for i in out))
    

kahattin.
cretth.
marie.
aliah.
cellinanmen.
rice.
wenaion.
breatlee.
shawyna.
grah.
mrin.
greng.
grewye.
korla.
bya.
jaka.
thrist.
esvayah.
brries.
gen.
