In [1]:
import torch

In [2]:
words = open('names.txt', 'r').read().splitlines()

In [3]:
N = torch.zeros((27,27), dtype = torch.int32)

In [4]:
chars = sorted(list(set(''.join(words))))
stoi = {s: i+1 for i,s in enumerate(chars)}
stoi['.'] = 0
itos = {i: s for s,i in stoi.items()}

In [5]:

for w in words:
    chs = ['.'] + list(w) + ['.']
    for ch1, ch2 in zip(chs, chs[1:]):
        ix1 = stoi[ch1]
        ix2 = stoi[ch2]
        N[ix1, ix2] += 1

In [47]:
p = N[0].float()
p /= p.sum()
p

tensor([0.0000, 0.1377, 0.0408, 0.0481, 0.0528, 0.0478, 0.0130, 0.0209, 0.0273,
        0.0184, 0.0756, 0.0925, 0.0491, 0.0792, 0.0358, 0.0123, 0.0161, 0.0029,
        0.0512, 0.0642, 0.0408, 0.0024, 0.0117, 0.0096, 0.0042, 0.0167, 0.0290])

In [6]:
P = (N+1).float()  #here +1 to N will make sure that there are no 0 values in our matrix that we have built. This is also called as model smoothing.
P /= P.sum(1, keepdims = True)

In [7]:
g = torch.Generator().manual_seed(2147483647)

for i in range(5):
    out = []
    ix = 0
    while True:
        p = P[ix]
        ix = torch.multinomial(p, num_samples= 1, replacement = True, generator = g).item()
        out.append(itos[ix])
        if ix == 0:
            break
    print(''.join(out))

junide.
janasah.
p.
cony.
a.


In [8]:
log_likelihood = 0
n = 0

for w in words:
    chs = ['.'] + list(w) + ['.']
    for ch1, ch2 in zip(chs, chs[1:]):
        ix1 = stoi[ch1]
        ix2 = stoi[ch2]
        prob = P[ix1, ix2]
        logprob = torch.log(prob)
        log_likelihood += logprob
        n += 1
        N[ix1, ix2] += 1

nll = -log_likelihood/n              # average negative log likelihood
nll

tensor(2.4544)

In [None]:
#Below is a neural network performing the same thing.

In [9]:
#Creating a dataset for neural network

xs, ys = [], []

for w in words:
    chs = ['.'] + list(w) + ['.']
    for ch1, ch2 in zip(chs, chs[1:]):
        ix1 = stoi[ch1]
        ix2 = stoi[ch2]
        xs.append(ix1)
        ys.append(ix2)

xs = torch.tensor(xs)
ys = torch.tensor(ys)
num = xs.nelement()
print(f'The total number of examples or bigrams is:  {num}')

The total number of examples or bigrams is:  228146


In [10]:
import torch.nn.functional as F

In [11]:
#Randomly initialize 27 neurons' weights. Each neuron receives 27 inputs.
g = torch.Generator().manual_seed(2147483647)
W = torch.randn((27,27), generator = g, requires_grad = True)

In [17]:
#Gradient Descent
for k in range(100):

    #Forward Pass
    xenc = F.one_hot(xs, num_classes = 27).float() #input to the network: One Hot Encoding
    logits = xenc @ W      #Predict log-counts
    counts = logits.exp()  #Equivalent to N
    probs = counts / counts.sum(1, keepdims = True) #probabilites for next character
    #the last two lines where we have calculated counts and probs are called 'Softmax'.
    
    loss = -probs[torch.arange(num), ys].log().mean()  #Negative log likelihood
    print(loss.item())

    #Backward Pass
    W.grad = None # Gradient is set to zero
    loss.backward()
    
    #Update
    W.data += -50 * W.grad

2.4726529121398926
2.4724340438842773
2.4722201824188232
2.472010850906372
2.4718058109283447
2.4716053009033203
2.471409320831299
2.471216917037964
2.4710283279418945
2.470843553543091
2.4706625938415527
2.4704854488372803
2.4703118801116943
2.4701414108276367
2.4699742794036865
2.4698104858398438
2.4696500301361084
2.469492197036743
2.4693377017974854
2.4691858291625977
2.4690372943878174
2.468891143798828
2.468747615814209
2.46860671043396
2.468468427658081
2.468332529067993
2.4681990146636963
2.4680681228637695
2.4679393768310547
2.4678127765655518
2.46768856048584
2.4675662517547607
2.4674463272094727
2.467327833175659
2.467211961746216
2.467097759246826
2.4669857025146484
2.4668753147125244
2.466766357421875
2.4666597843170166
2.466554641723633
2.4664509296417236
2.4663491249084473
2.4662492275238037
2.4661505222320557
2.4660532474517822
2.4659576416015625
2.4658634662628174
2.4657704830169678
2.465679407119751
2.4655895233154297
2.465500593185425
2.4654135704040527
2.46532750129

In [18]:
#Finally sample from the neural network
g = torch.Generator().manual_seed(2147483647)

for i in range(5):

    out = []
    ix = 0
    while True:
        xenc = F.one_hot(torch.tensor([ix]), num_classes = 27).float()
        logits = xenc @ W #Predict log-counts
        counts = logits.exp() #counts is equivalent to N
        p = counts / counts.sum(1, keepdims = True) #Probabilities for next character
        
        ix = torch.multinomial(p, num_samples= 1, replacement = True, generator = g).item()
        out.append(itos[ix])
        if ix == 0:
            break
    print(''.join(out))

junide.
janasah.
prelay.
a.
nn.
