In [136]:
import torch
import torch.nn.functional as F
torch.cuda.is_available()

True

# Import Text Document of Names

In [158]:
words = open('names.txt', 'r', encoding="utf-8").read().splitlines()

In [159]:
words[:10]

['emma',
 'olivia',
 'ava',
 'isabella',
 'sophia',
 'charlotte',
 'mia',
 'amelia',
 'harper',
 'evelyn']

### Create training set of bigrams

In [160]:
chars = sorted(list(set(''.join(words))))
stoi = {s:i+1 for i,s in enumerate(chars)}
stoi['.'] = 0 #denotes the beginning and ending of words
itos = {i:s for s,i in stoi.items()}

In [161]:
stoi

{'a': 1,
 'b': 2,
 'c': 3,
 'd': 4,
 'e': 5,
 'f': 6,
 'g': 7,
 'h': 8,
 'i': 9,
 'j': 10,
 'k': 11,
 'l': 12,
 'm': 13,
 'n': 14,
 'o': 15,
 'p': 16,
 'q': 17,
 'r': 18,
 's': 19,
 't': 20,
 'u': 21,
 'v': 22,
 'w': 23,
 'x': 24,
 'y': 25,
 'z': 26,
 '.': 0}

In [162]:
xs, ys = [], [] #x = input, y = target

for w in words:
    chars = ['.'] + list (w) + ['.']
    for char1, char2 in zip(chars, chars[1:]):
        index1 = stoi[char1]
        index2 = stoi[char2]
        xs.append(index1)
        ys.append(index2)

        
#conver to tensors
xs = torch.tensor(xs)
ys = torch.tensor(ys)        
num = xs.nelement()

In [163]:
print(f'Number of examples: {num}')

Number of examples: 228146


### Initialize Generator for Reproducibility

In [164]:
g = torch.Generator().manual_seed(2147483647)
W = torch.randn((27,27), generator=g, requires_grad=True) #generate 27 x 27 vectors of random weights

## Optimizaiton

In [166]:
for k in range(100): #epochs
    #forward pass
    x_encoding = F.one_hot(xs, num_classes=27).float() #for each character (index), create a 1d array of length 27 
                                                       #(26 letters + special start/end char) where the chars index is 1 while all else is 0
    logits = (x_encoding @ W) #  log counts #matrix multiplication sign in pytorch, the resulting 5,27 tensor tells us the firing rate of each neuron given these 5 examples
    counts = logits.exp() # better approximation of counts because always positive
    probs = counts / counts.sum(1, keepdim=True) # normalize to approximate individual probabilities for each letter in the vector. counts into probs is called softmax
    loss = -probs[torch.arange(num), ys].log().mean() # negative log likelihood
    print(loss.item())
    
    #backward pass
    W.grad = None # set the zero to gradient
    loss.backward() # fills in intermediary gradients all the way back to W
    
    #update weights
    W.data += -50 * W.grad   # each element here describes the effect on the loss function in the form of 27x27 tensor
                        # essentially setting the learning rate to -.1 (which is negative to minimize loss function)

2.4726526737213135
2.4724342823028564
2.4722204208374023
2.472010850906372
2.471806049346924
2.4716053009033203
2.471409320831299
2.4712166786193848
2.4710280895233154
2.470843553543091
2.4706625938415527
2.4704854488372803
2.4703118801116943
2.4701414108276367
2.4699742794036865
2.469810724258423
2.4696500301361084
2.469492197036743
2.4693379402160645
2.4691860675811768
2.4690372943878174
2.468891143798828
2.468747615814209
2.46860671043396
2.468468427658081
2.468332529067993
2.4681990146636963
2.4680681228637695
2.4679393768310547
2.467813014984131
2.467688798904419
2.4675662517547607
2.4674465656280518
2.4673283100128174
2.467212438583374
2.467097759246826
2.4669857025146484
2.4668750762939453
2.466766595840454
2.4666597843170166
2.4665544033050537
2.4664509296417236
2.4663493633270264
2.4662492275238037
2.4661502838134766
2.466053009033203
2.4659576416015625
2.4658634662628174
2.465770721435547
2.465679407119751
2.4655895233154297
2.465500593185425
2.4654133319854736
2.465327501296