In [6]:
import torch
import torch.nn.functional as F

In [2]:
words = open('names.txt', 'r').read().splitlines()

In [3]:
#There will be 26 characerts a-z and one extra character '.'
N = torch.zeros((27,27), dtype = torch.int32)

In [4]:
#Look-up table for integer to character and vice-versa
chars = sorted(list(set(''.join(words))))
stoi = {s: i+1 for i,s in enumerate(chars)}
stoi['.'] = 0
itos = {i: s for s,i in stoi.items()}

In [5]:
#Creating a dataset for neural network

xs, ys = [], []

for w in words:
    chs = ['.'] + list(w) + ['.']
    for ch1, ch2 in zip(chs, chs[1:]):
        ix1 = stoi[ch1]
        ix2 = stoi[ch2]
        xs.append(ix1)
        ys.append(ix2)

xs = torch.tensor(xs)
ys = torch.tensor(ys)
num = xs.nelement()
print(f'The total number of examples or bigrams is:  {num}')

The total number of examples or bigrams is:  228146


In [21]:
#Randomly initialize 27 neurons' weights. Each neuron receives 27 inputs.
g = torch.Generator().manual_seed(2147483647)
W = torch.randn((27,27), generator = g, requires_grad = True)

In [22]:
#Gradient Descent
for k in range(100):

    #Forward Pass
    xenc = F.one_hot(xs, num_classes = 27).float() #input to the network: One Hot Encoding
    logits = xenc @ W      #Predict log-counts
    counts = logits.exp()  
    probs = counts / counts.sum(1, keepdims = True) #Probabilities for next character
    
    loss = -probs[torch.arange(num), ys].log().mean()  #Negative log likelihood

    #Printing loss of last 5 iterations
    if k >= 95:
        print(loss.item())
    else:
        pass

    #Backward Pass
    W.grad = None # Gradient is set to zero
    loss.backward()
    
    #Update
    W.data += -50 * W.grad

2.4738216400146484
2.4735772609710693
2.4733383655548096
2.47310471534729
2.4728758335113525


In [23]:
#Finally sample from the neural network
g = torch.Generator().manual_seed(2147483647)

for i in range(5):

    out = []
    ix = 0
    while True:
        xenc = F.one_hot(torch.tensor([ix]), num_classes = 27).float()
        logits = xenc @ W #Predict log-counts
        counts = logits.exp() 
        p = counts / counts.sum(1, keepdims = True) #Probabilities for next character
        
        ix = torch.multinomial(p, num_samples= 1, replacement = True, generator = g).item()
        out.append(itos[ix])
        if ix == 0:
            break
    print(''.join(out))

junide.
janasah.
p.
cfay.
a.
