# Imports

In [2]:
import torch

In [3]:
import torch.nn.functional as F


# loading the dataset

In [4]:
def get_data():
    with open('names.txt', 'r') as f:
        names = f.read().splitlines()
    return names
    
names = get_data()

In [5]:
# creating "stoi" and "itos"
chars = sorted(list(set(''.join(names))))
stoi = {s:i+1 for i,s in enumerate(chars)}
stoi['.'] = 0
itos = {i:s for s, i in stoi.items()}



# creating the training data

In [6]:
xs, ys = [], []

for n in names:
    chs = ['.', '.'] + list(n) + ['.','.']
    for chs1, chs2, chs3 in zip(chs, chs[1:], chs[2:]):
        ix1 = stoi[chs1]
        ix2 = stoi[chs2]
        ix3 = stoi[chs3]
        xs.append((ix1, ix2))
        ys.append(ix3)
    

xs = torch.tensor(xs)
ys = torch.tensor(ys)


In [7]:
#one-hot encoded of xs
xenc = F.one_hot(xs, num_classes=27).float()
xenc[0]

tensor([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0.]])

In [8]:
xenc[0][0]

tensor([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [9]:
xenc.shape

torch.Size([260179, 2, 27])

In [10]:
test = torch.cat([xenc[0][0], xenc[0][1]])
test

tensor([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [11]:
xenc_cat = xenc.view(-1, 2*27)
xenc_cat[0]

tensor([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

# NN

In [12]:
# initializing W
g = torch.Generator().manual_seed(2147483647)
W = torch.randn((2*27, 27), generator=g, requires_grad=True)

In [13]:
num = xenc.shape[0]
num

260179

In [41]:
for i in range(100):

    logits = xenc_cat @ W #equivalent to count's logarithm
    counts = logits.exp()
    probs = counts/counts.sum(1, keepdim=True)
    loss = -probs[torch.arange(num), ys].log().mean()
    print("loss: ", loss.item())

    W.grad = None
    loss.backward()

    #update
    W.data += -40 * W.grad

    

loss:  2.084049940109253
loss:  2.0836710929870605
loss:  2.0832998752593994
loss:  2.0829362869262695
loss:  2.0825793743133545
loss:  2.0822293758392334
loss:  2.0818865299224854
loss:  2.081549644470215
loss:  2.081219434738159
loss:  2.080894947052002
loss:  2.0805766582489014
loss:  2.08026385307312
loss:  2.0799572467803955
loss:  2.079655647277832
loss:  2.079359292984009
loss:  2.079068422317505
loss:  2.078782558441162
loss:  2.0785017013549805
loss:  2.0782251358032227
loss:  2.077953577041626
loss:  2.077686309814453
loss:  2.0774238109588623
loss:  2.0771656036376953
loss:  2.076911449432373
loss:  2.0766613483428955
loss:  2.076415538787842
loss:  2.0761735439300537
loss:  2.075934886932373
loss:  2.075700521469116
loss:  2.075469732284546
loss:  2.075242280960083
loss:  2.0750181674957275
loss:  2.0747976303100586
loss:  2.074580430984497
loss:  2.074366331100464
loss:  2.074155330657959
loss:  2.0739476680755615
loss:  2.0737428665161133
loss:  2.0735411643981934
loss:  

In [17]:
print(W, W.type)

tensor([[-8.5252e+00,  2.8615e+00,  3.5079e-01,  ..., -1.1317e+00,
          1.2614e+00,  8.2836e-01],
        [ 1.5217e+00,  4.5951e-01, -4.2785e-01,  ..., -1.3107e+00,
         -6.9945e-03, -4.4299e-01],
        [ 4.5454e-01,  9.9407e-01,  9.9703e-02,  ..., -6.1054e-01,
          3.2660e-01, -1.1959e+00],
        ...,
        [ 3.6765e-01, -6.6534e-02, -1.6148e-01,  ..., -4.1084e-01,
          2.2709e-02,  1.4128e-01],
        [ 1.3108e+00,  1.9231e+00, -3.6624e-01,  ..., -1.2010e+00,
         -9.7226e-01,  1.0099e-01],
        [-5.7171e-02,  1.9434e+00,  1.2649e-01,  ..., -1.4299e-01,
          8.7116e-01, -3.8587e-01]], requires_grad=True) <built-in method type of Tensor object at 0x0000023BDC1482D0>


In [43]:
W.shape

torch.Size([54, 27])

# generative model

In [48]:
g = torch.Generator().manual_seed(2147483647)

for i in range(5):
    ix1 = 0 
    ix2 = 0 #because we begin our words from .. which is index 0
    out = []
    while True:
        xenc = F.one_hot(torch.tensor([ix1, ix2]), num_classes=27).float().reshape(-1)
        logits = xenc @ W
        p = logits.exp()/(logits.exp().sum(-1, keepdim=True))
        ix1 = ix2
        ix2 = torch.multinomial(p, num_samples=1, replacement=True, generator=g ).item()
        if ix2 == 0:
            break
        out.append(itos[ix2])

    print(''.join(out))

juwide
janasad
pariay
ainn
koi


In [50]:
out

['k', 'o', 'i']