In [877]:
words = open('Names.txt', 'r').read().splitlines()
words = [x.lower() for x in words]
chars = sorted(list(set(''.join(words))))
stoi = {s:i+1 for i,s in enumerate(chars)}
stoi['<>'] = 0

In [435]:
#creating the training sets for the bigram neural network
xs, ys = [], []

for w in words[:1]:
    chs = ['<>'] + list(w) + ['<>']
    for ch1, ch2 in zip(chs, chs[1:]):
        x1 = stoi[ch1]
        x2 = stoi[ch2]
        xs.append(x1)
        ys.append(x2)

xs = torch.tensor(xs)
ys = torch.tensor(ys)

In [448]:
g = torch.Generator().manual_seed(9845165659)
W = torch.randn((27,27), generator = g)

import torch.nn.functional as F
xenc = F.one_hot(xs, num_classes=27).float()
logits = xenc @ W
counts = logits.exp()
probs = counts / counts.sum(1, keepdim = True)

In [506]:
itos = {i:s for s,i in stoi.items()}
nlls = torch.zeros(5)
for i in range (5):
    x = xs[i].item()
    y = ys[i].item()
    print ("*---------------*")
    print (f'Bigram example {i+1}: {itos[x]}{itos[y]} (indexes {x}, {y})')
    print (f'Input to the neural net = {x}')
    print (f'Output probabilities from the neural net:', probs[i])
    print (f'Actual next character = {y}')
    print (f'Probability assigned by the neural net to the next actual character: {probs[i,y].item():4f}')
    logprob = torch.log(probs[i,y])
    nll = -logprob
    print (f'Negative log likelihood: {nll.item():4f}')
    nlls[i] = nll

print (f'Average negative log likelihood: {nlls.mean().item()}')

*---------------*
Bigram example 1: <>l (indexes 0, 12)
Input to the neural net = 0
Output probabilities from the neural net: tensor([0.0041, 0.0358, 0.0143, 0.0129, 0.0369, 0.0173, 0.0201, 0.0922, 0.0386,
        0.0026, 0.0700, 0.0177, 0.0206, 0.0151, 0.1510, 0.0316, 0.0117, 0.0061,
        0.0411, 0.0630, 0.0050, 0.0251, 0.0124, 0.0555, 0.0034, 0.1864, 0.0095])
Actual next character = 12
Probability assigned by the neural net to the next actual character: 0.020574
Negative log likelihood: 3.883740
*---------------*
Bigram example 2: li (indexes 12, 9)
Input to the neural net = 12
Output probabilities from the neural net: tensor([0.0217, 0.0066, 0.0301, 0.0273, 0.0028, 0.0153, 0.0330, 0.0645, 0.1567,
        0.0352, 0.0256, 0.0076, 0.0116, 0.0357, 0.0503, 0.1076, 0.0339, 0.0065,
        0.1230, 0.0327, 0.0500, 0.0073, 0.0342, 0.0216, 0.0247, 0.0232, 0.0112])
Actual next character = 9
Probability assigned by the neural net to the next actual character: 0.035201
Negative log likelihood

In [582]:
# ********* Start of the Neural Network Implementation Of The Above Bigram Model *********

In [903]:
#creating the training sets for the bigram neural network
xs, ys = [], []

for w in words:
    chs = ['<>'] + list(w) + ['<>']
    for ch1, ch2 in zip(chs, chs[1:]):
        x1 = stoi[ch1]
        x2 = stoi[ch2]
        xs.append(x1)
        ys.append(x2)

xs = torch.tensor(xs)
ys = torch.tensor(ys)
num = xs.nelement()
print(f'Total number of input samples: {num}')

Total number of input samples: 13576


In [904]:
# Random initialisation of the weights of the neurons for the first run (ONLY FIRST TIME!)
g = torch.Generator().manual_seed(9845165659)
W = torch.randn((27,27), generator = g, requires_grad=True)

In [916]:
# Gradient descent optimisation of the neural network

for Optimisation in range (100):

    # Forward pass of the neural network
    import torch.nn.functional as F
    xenc = F.one_hot(xs, num_classes=27).float()
    logits = xenc @ W
    counts = logits.exp()
    probs = counts / counts.sum(1, keepdim = True)
    loss = - probs[torch.arange(num), ys].log().mean()
    print(f'Loss=: {loss}')
    
    # Backward pass of the neural netword
    W.grad = None
    loss.backward()
    
    # Finetuning the weights of the neural netword
    learning_rate = 10
    W.data += -learning_rate * W.grad

Loss=: 2.4055187702178955
Loss=: 2.405505895614624
Loss=: 2.4054927825927734
Loss=: 2.405480146408081
Loss=: 2.4054670333862305
Loss=: 2.405454397201538
Loss=: 2.4054415225982666
Loss=: 2.405428409576416
Loss=: 2.4054157733917236
Loss=: 2.4054031372070312
Loss=: 2.405390501022339
Loss=: 2.4053776264190674
Loss=: 2.405364990234375
Loss=: 2.4053523540496826
Loss=: 2.4053397178649902
Loss=: 2.4053268432617188
Loss=: 2.4053146839141846
Loss=: 2.405301809310913
Loss=: 2.405289649963379
Loss=: 2.4052767753601074
Loss=: 2.4052646160125732
Loss=: 2.40525221824646
Loss=: 2.4052395820617676
Loss=: 2.405226945877075
Loss=: 2.405214786529541
Loss=: 2.4052023887634277
Loss=: 2.4051902294158936
Loss=: 2.4051778316497803
Loss=: 2.405165433883667
Loss=: 2.405153274536133
Loss=: 2.4051411151885986
Loss=: 2.4051289558410645
Loss=: 2.405116558074951
Loss=: 2.405104398727417
Loss=: 2.405092239379883
Loss=: 2.4050800800323486
Loss=: 2.4050681591033936
Loss=: 2.4050559997558594
Loss=: 2.405043840408325
Loss

In [917]:
# Sampling from the Neural Network

g = torch.Generator().manual_seed(666)
for i in range (10):
    out = []
    index = 0
    while True:
        xenc = F.one_hot(torch.tensor([index]), num_classes=27).float()
        logits = xenc @ W
        counts = logits.exp()
        probs = counts / counts.sum(1, keepdims = True)
    
        index = torch.multinomial(probs, num_samples = 1, replacement = True, generator = g).item()
        out.append(itos[index])
        if index == 0:
            break
    print(''.join(out))

ka<>
ahute<>
gethan<>
verinian<>
ryan<>
kyman<>
hleruuttelen<>
h<>
ke<>
kyniovelilesalirqesieriaderl<>
