In [None]:
import torch
import matplotlib.pyplot as plt

In [None]:
words = open('names.txt', 'r').read().splitlines()

In [None]:
words[:10]

In [None]:
len(words)

In [None]:
sorted(b.items(), key = lambda kv: -kv[1]) # sort be by biggest second key element in the tuples

In [None]:
N = torch.zeros((27, 27), dtype = torch.int32) #28-by-28 (letters plus special start/end) matrix that will store the number of times a bigram appears


In [None]:
chars = sorted(list(set(''.join(words)))) #list of chars from a-z
stoi = {s:i+1 for i,s in enumerate(chars)} #each char matched to an integer
stoi['.'] = 0 #add index for all special chars
itos = {i:s for s, i in stoi.items()} # display index first, then char
stoi

In [None]:
#Nice image of N matrix

%matplotlib inline

plt.figure(figsize=(16,16))
plt.imshow(N, cmap='Blues')
for i in range(27):
    for j in range(27):
        chstr = itos[i] + itos[j]
        plt.text(j, i, chstr, ha="center", va="bottom", color='gray')
        plt.text(j, i, N[i, j].item(), ha="center", va="top", color='gray')
plt.axis('off');

In [None]:
#Probability distributiion

P = (N+1).float() 
P /= P.sum(1, keepdim = True) #normalize the values of the row so they are the probability of each letter appearing first in a word

In [313]:
g = torch.Generator().manual_seed(2147483647)

for i in range(10):
    
    out = []
    ix = 0
    while True:
        p = P[ix]
        ix = torch.multinomial(p, num_samples = 1, replacement = True, generator = g).item()
        out.append(itos[ix])
        if ix == 0:
            break
    print(''.join(out))

cexze.
momasurailezitynn.
konimittain.
llayn.
ka.
da.
staiyaubrtthrigotai.
moliellavo.
ke.
teda.


In [351]:
#Bigram (sequence of two consecutive chars)

log_likelihood = 0
n = 0
for w in words:
    chs = ['.'] + list(w) + ['.'] #new character list starting with 'special start' and ending in 'special end'
    for ch1, ch2 in zip(chs, chs[1:]): #pair the first char of chs with second char, then second char with the third char and so on
        ix1 = stoi[ch1] #find the row of the first char
        ix2 = stoi[ch2] #find the column of the second char
        prob = P[ix1, ix2]
        logprob = torch.log(prob) #convert prob to log(prob) for better numbers to work with. Also: log(a*b*c) = log(a) + log(b) + log(c)
        log_likelihood += logprob
        n+=1
        #print(f'{ch1}{ch2}: {logprob: .4f}')

nll = -log_likelihood #we need to have smaller number be better for the loss function
print(f'{nll/n}')

2.4543561935424805


In [None]:
#create training set of all the bigrams

xs, ys = [], [] #example input, desired output

for w in words[:1]:
    chs = ['.'] + list(w) + ['.'] #new character list starting with 'special start' and ending in 'special end'
    for ch1, ch2 in zip(chs, chs[1:]): #pair the first char of chs with second char, then second char with the third char and so on
        ix1 = stoi[ch1] #find the row of the first char
        ix2 = stoi[ch2] #find the column of the second char
        xs.append(ix1) #first word
        ys.append(ix2)

xs = torch.tensor(xs)
ys = torch.tensor(ys)

In [None]:
xs

In [None]:
ys

In [None]:
#Inputs
#need a way to put the inputs into the NN. Represent them as vectors of dimension 27 and put a 1 in the index number of the character
import torch.nn.functional as F
xenc = F.one_hot(xs, num_classes=27).float() #need it to be a float to feed into NN

In [None]:


#Z-values under Softmax activation function (creates probability)
logits = (xenc @ W)
counts = logits.exp() #matrix multiplication interperated as log values, so exp() to remove exp and make positive (to get "counts")
probs = counts/counts.sum(1, keepdim = True) #interperate as a probability distribution to use for loss function
#Classification: Negative Log Likeliness
loss = loss = (len(ys))**(-1) * sum(-torch.log(probs[ix1][ix2]) for ix1, ix2 in enumerate(ys))
loss

In [329]:
#Calculating weights IMPORTANT TO INCLUDE "requires_grad = True"
W = torch.randn((27,27), requires_grad = True) #find random values for 27 weights for 27 neurons in the first layer (one for each char) using normal distribution

In [330]:
import torch.nn.functional as F
lr = 0.1
total_loss = 0

#Forward pass
for k in range(100):  
    for w in words:
        xs, ys = [], [] #example input, desired output
        
        chs = ['.'] + list(w) + ['.'] #new character list starting with 'special start' and ending in 'special end'
        for ch1, ch2 in zip(chs, chs[1:]): #pair the first char of chs with second char, then second char with the third char and so on
            ix1 = stoi[ch1] #find the row of the first char
            ix2 = stoi[ch2] #find the column of the second char
            xs.append(ix1) #inputs
            ys.append(ix2) #desired outputs
    
        xs = torch.tensor(xs)
        ys = torch.tensor(ys)
    
        #Activation of each neuron:
        xenc = F.one_hot(xs, num_classes=27).float() #need a way to put the inputs into the NN. Represent them as vectors of dimension 27 and put a 1 in the index number of the character
        
        #Z-values under Softmax activation function (creates probability)
        logits = (xenc @ W)
        counts = logits.exp() #matrix multiplication interperated as log values, so exp() to remove exp and make positive (to get "counts")
        probs = counts/counts.sum(1, keepdim = True) #interperate as a probability distribution to use for loss function
        
        #Classification: Negative Log Likeliness
        #adding the 0.01*(W**2).mean() called regularization. This insentivizes weights to be close to 0 which smooths the probabilities; similar to adding 1 to the count for each bigram) 
        loss = (-probs[torch.arange(ys.shape[0]), ys].log()).mean() + 0.01*(W**2).mean()
        #print(loss.item())
        total_loss += loss
        
        #Backward pass
        W.grad = None #sets gradient to zero
        loss.backward()
        with torch.no_grad():
            W -= lr * W.grad
            W.grad.zero_()

    avg_loss = total_loss / len(words)
    print(f"Epoch: {avg_loss.item()}")
    total_loss = 0

Epoch: 2.583664894104004
Epoch: 2.4645891189575195
Epoch: 2.456129550933838
Epoch: 2.4538660049438477
Epoch: 2.45296049118042
Epoch: 2.452498435974121
Epoch: 2.4522459506988525
Epoch: 2.452084541320801
Epoch: 2.4519758224487305
Epoch: 2.45190691947937
Epoch: 2.451857328414917
Epoch: 2.4518258571624756
Epoch: 2.45180082321167
Epoch: 2.451784372329712
Epoch: 2.451770305633545
Epoch: 2.451756715774536
Epoch: 2.451746702194214
Epoch: 2.4517433643341064


KeyboardInterrupt: 

In [358]:
#Sample from Neural Net

for i in range(5):
    
    out = []
    ix = 0

    while True:
        
        xenc = F.one_hot(torch.tensor([ix]), num_classes=27).float() #Get vector representing character
        logits = xenc @ W #Predict log counts
        counts = logits.exp() #Get counts, equivalent to N
        p = counts / counts.sum(1, keepdim=True) #Normalize counts so they are the probability of the next character

        ix = torch.multinomial(p, num_samples=1, replacement=True).item() #Pick a random char index from p
        out.append(itos[ix]) #Convert the index to the character and add it to the word
        if ix == 0: #End the generation if '.' appears
            break
    print(''.join(out))

dampaviynt.
baviy.
del.
tirilibenix.
ttaeyiomonzkleves.
