In [2]:
import torch
from torch import nn

device = "cuda" if torch.cuda.is_available() else "cpu"
torch.set_default_device(device)
print(device)

cuda


In [7]:
with open("names.txt", 'r', encoding="UTF-8") as f:
    text = f.read()
    words = text.splitlines()
    chars = sorted(set("".join(words)))
    print(chars)

stoi = {char:integer+1 for integer,char in enumerate(chars)}
itos = {integer+1:char for integer,char in enumerate(chars)}

stoi["."] = 0
itos[0] = "."

['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']


In [14]:
inputs, labels = [], []
for w in words[:1]:
    chs = ["."] + list(w) + ["."]
    for char1, char2 in zip(chs, chs[1:]):
        ix1 = stoi[char1]
        ix2 = stoi[char2]

        inputs.append(ix1) #Char before the label
        labels.append(ix2) #Char expected after ix1


inputs = torch.tensor(inputs)
labels = torch.tensor(labels)
print(inputs)
print(labels)

tensor([ 0,  5, 13, 13,  1], device='cuda:0')
tensor([ 5, 13, 13,  1,  0], device='cuda:0')


In [31]:
import torch.nn.functional as F

one_hot_inputs = F.one_hot(inputs, num_classes=27).type(torch.float32)
print(one_hot_inputs)



tensor([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0.]], device='cuda:0')


In [35]:
#Creating the first neuron
W = torch.randn((27, 1)) #Each input has its own weight
outputs = one_hot_inputs @ W
print(outputs)
"""
5x27 --> 5 letters and there are 27 letters total
5x27 @ 27x1 --> 5x1
@ --> pytorch matrix multiplication operator
"""

tensor([[-0.0183],
        [-0.8126],
        [ 0.4512],
        [ 0.4512],
        [ 0.2537]], device='cuda:0')


'\n5x27 @ 27x1 --> 5x1\n@ --> pytorch matrix multiplication operator\n'

In [65]:
#All of this is basically a forward pass
#Creating 27 neurons
W = torch.randn((27, 27), requires_grad=True)
logits = one_hot_inputs @ W

"""
5x27 @ 27x27 --> 5x27
"""

#These 2 (two) lines is basically a softmax activation function being applied to the layer
counts = torch.exp(logits)
probs = torch.div(counts, counts.sum(dim=1, keepdim=True))
probs_of_guessing_correctly = probs[torch.arange(5), labels]
print(probs_of_guessing_correctly) #probs of guessing each of the 5 letters correclty
negative_log_likelihood_mean = -probs_of_guessing_correctly.log().mean()
loss = negative_log_likelihood_mean
print(negative_log_likelihood_mean)

tensor([0.0202, 0.0130, 0.0241, 0.0224, 0.0346], device='cuda:0',
       grad_fn=<IndexBackward0>)
tensor(3.8258, device='cuda:0', grad_fn=<NegBackward0>)


In [49]:
"""
#Other way of doing this

class BigramNN(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.layer1 = nn.Linear(in_features=27, out_features=27, bias=False, dtype=torch.float32)

    def forward(self, inputs):
        return self.layer1(inputs)

model = BigramNN()
model.eval()
with torch.inference_mode():
    logits = model(one_hot_inputs)
    preds = torch.softmax(logits, dim=1)
print(preds)
"""

tensor([[0.0311, 0.0372, 0.0345, 0.0398, 0.0349, 0.0431, 0.0359, 0.0421, 0.0396,
         0.0399, 0.0340, 0.0376, 0.0311, 0.0387, 0.0328, 0.0439, 0.0435, 0.0331,
         0.0369, 0.0407, 0.0400, 0.0309, 0.0328, 0.0421, 0.0375, 0.0356, 0.0309],
        [0.0437, 0.0317, 0.0362, 0.0445, 0.0434, 0.0358, 0.0341, 0.0333, 0.0320,
         0.0408, 0.0382, 0.0432, 0.0401, 0.0372, 0.0398, 0.0362, 0.0317, 0.0316,
         0.0363, 0.0326, 0.0369, 0.0381, 0.0438, 0.0361, 0.0376, 0.0308, 0.0344],
        [0.0368, 0.0416, 0.0389, 0.0330, 0.0391, 0.0415, 0.0430, 0.0328, 0.0319,
         0.0383, 0.0343, 0.0361, 0.0350, 0.0427, 0.0438, 0.0419, 0.0418, 0.0381,
         0.0378, 0.0333, 0.0338, 0.0330, 0.0416, 0.0309, 0.0315, 0.0339, 0.0337],
        [0.0368, 0.0416, 0.0389, 0.0330, 0.0391, 0.0415, 0.0430, 0.0328, 0.0319,
         0.0383, 0.0343, 0.0361, 0.0350, 0.0427, 0.0438, 0.0419, 0.0418, 0.0381,
         0.0378, 0.0333, 0.0338, 0.0330, 0.0416, 0.0309, 0.0315, 0.0339, 0.0337],
        [0.0345, 0.0343,

In [66]:
print(f"previous loss --> {loss}")
#Backward propagation
#Zero the grad
W.grad = None

loss.backward()

#If a value on the grad is positivie it means that, that one value needs go down by tweaking our weights
#likewise for negatives values
#print(W.grad)

W.data += -0.1*W.grad #simples gradient descent
logits = one_hot_inputs @ W
counts = torch.exp(logits)
probs = torch.div(counts, counts.sum(dim=1, keepdim=True))

probs_of_guessing_right = probs[torch.arange(5), labels]
negative_log_likelihood_mean = -probs_of_guessing_right.log().mean()
loss = negative_log_likelihood_mean


print(f"current loss --> {loss}")

previous loss --> 3.825788974761963
current loss --> 3.8046321868896484


In [None]:
#Loop
epochs = 10
