# Imports

In [2]:
import torch

In [3]:
import torch.nn.functional as F


# loading the dataset

In [4]:
def get_data():
    with open('names.txt', 'r') as f:
        names = f.read().splitlines()
    return names
    
names = get_data()

In [5]:
# creating "stoi" and "itos"
chars = sorted(list(set(''.join(names))))
stoi = {s:i+1 for i,s in enumerate(chars)}
stoi['.'] = 0
itos = {i:s for s, i in stoi.items()}



# creating the training data

In [6]:
xs, ys = [], []

for n in names:
    chs = ['.', '.'] + list(n) + ['.','.']
    for chs1, chs2, chs3 in zip(chs, chs[1:], chs[2:]):
        ix1 = stoi[chs1]
        ix2 = stoi[chs2]
        ix3 = stoi[chs3]
        xs.append((ix1, ix2))
        ys.append(ix3)
    

xs = torch.tensor(xs)
ys = torch.tensor(ys)


In [7]:
#one-hot encoded of xs
xenc = F.one_hot(xs, num_classes=27).float()
xenc[0]

tensor([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0.]])

In [8]:
xenc[0][0]

tensor([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [9]:
xenc.shape

torch.Size([260179, 2, 27])

In [10]:
test = torch.cat([xenc[0][0], xenc[0][1]])
test

tensor([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [11]:
xenc_cat = xenc.view(-1, 2*27)
xenc_cat[0]

tensor([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

# NN

In [12]:
# initializing W
g = torch.Generator().manual_seed(2147483647)
W = torch.randn((2*27, 27), generator=g, requires_grad=True)

In [13]:
num = xenc.shape[0]
num

260179

In [51]:
for i in range(100):

    logits = xenc_cat @ W #equivalent to count's logarithm
    counts = logits.exp()
    probs = counts/counts.sum(1, keepdim=True)
    loss = -probs[torch.arange(num), ys].log().mean()
    if i % 20 == 0:
        print("loss: ", loss.item())

    W.grad = None
    loss.backward()

    #update
    W.data += -40 * W.grad

    

loss:  2.0649526119232178
loss:  2.0632147789001465
loss:  2.0617709159851074
loss:  2.0605547428131104
loss:  2.0595171451568604


In [52]:
print(W, W.type)

tensor([[-10.5057,   2.8836,   0.5967,  ...,  -0.9788,   1.3044,   1.1338],
        [  1.5782,   0.3635,  -0.2223,  ...,  -1.5943,  -0.1607,  -0.2388],
        [  0.7839,   1.3579,  -0.0716,  ...,  -0.4852,   0.6638,  -1.2331],
        ...,
        [  0.2618,   0.4977,  -0.4188,  ...,   0.5696,   0.2397,   0.0908],
        [  1.3077,   1.9957,  -0.6259,  ...,  -0.9937,  -1.6430,   0.1324],
        [  0.0848,   2.2522,  -0.2436,  ...,  -0.3140,   1.5923,   0.2865]],
       requires_grad=True) <built-in method type of Tensor object at 0x0000023BDC1482D0>


In [53]:
W.shape

torch.Size([54, 27])

# generative model

In [54]:
g = torch.Generator().manual_seed(2147483647)

for i in range(5):
    ix1 = 0 
    ix2 = 0 #because we begin our words from .. which is index 0
    out = []
    while True:
        xenc = F.one_hot(torch.tensor([ix1, ix2]), num_classes=27).float().reshape(-1)
        logits = xenc @ W
        p = logits.exp()/(logits.exp().sum(-1, keepdim=True))
        ix1 = ix2
        ix2 = torch.multinomial(p, num_samples=1, replacement=True, generator=g ).item()
        if ix2 == 0:
            break
        out.append(itos[ix2])

    print(''.join(out))

juwide
janasad
pariay
ainn
koi
