In [28]:
import torch
import random
from torch._tensor import Tensor
import torch.nn.functional as F
import matplotlib.pyplot as plt
%matplotlib inline

In [29]:
g = torch.Generator().manual_seed(2147483647)

random.seed(2147483647)

In [30]:


words = open('../../assets/names.txt', 'r').read().splitlines()

In [31]:
chars = sorted(list(set(''.join(words))))
stoi = {char: i + 1 for i, char in enumerate(chars)}
stoi['.'] = 0
itos = {i: char for char, i in stoi.items()}

In [32]:

block_size = 3

def build_dataset(words: list[str] ) -> tuple[Tensor, Tensor]:

    Inputs, Outputs = [], []

    for w in words:
        context = [0] * block_size

        for ch in w + '.':
            ix = stoi[ch]
            Inputs.append(context)
            Outputs.append(ix)

            context = context[1:] + [ix]

    X = torch.tensor(Inputs)
    Y = torch.tensor(Outputs)

    return X, Y

random.shuffle(words)

training_split = int(len(words) * 0.8)
test_split = int(len(words) * 0.9)

Xtr, Ytr = build_dataset(words)
Xdev, Ydev = build_dataset(words[training_split:test_split])
Xte, Yte = build_dataset(words[test_split:])


In [33]:
C= torch.randn(27, 2, generator=g, requires_grad=True)
# Tanh activation Values
W1 = torch.randn(6, 100, generator=g, requires_grad=True)
B1 = torch.randn(100, generator=g, requires_grad=True)

# Softmax activation Values
W2 = torch.randn(100, 27, generator=g, requires_grad=True)
B2 = torch.randn(27, generator=g, requires_grad=True)

prameters = [C, W1, B1, W2, B2]

sum(p.nelement() for p in prameters)

3481

In [None]:

def train_model(X, Y: Tensor) -> float:
    final_loss = 0
    for _ in range(10000):
        #forward pass
        ix = torch.randint(0, X.shape[0], (32, ))
        emb = C[X[ix]]
        h = torch.tanh(emb.view(-1, block_size * 2) @ W1 + B1)

        logits = h @ W2 + B2
        loss = F.cross_entropy(logits, Y[ix])

        for p in prameters:
            p.grad = None

        loss.backward()

        for p in prameters:
            if p.grad is not None:
                p.data += -0.01 * p.grad

        final_loss = loss.item()

    return final_loss

loss = train_model(X=Xtr, Y=Ytr)

print(f"{loss:.2f}")

def evaluate_model(X, Y: Tensor) -> float:
    with torch.no_grad():
        emb = C[X]
        h = torch.tanh(emb.view(-1, block_size * 2) @ W1 + B1)

        logits = h @ W2 + B2
        loss = F.cross_entropy(logits, Y)

        return loss.item()


2.03


In [22]:
# Training split ( 80% ) , Dev Split (10%), Test Split (10%)
