# Single Layer Name-like Word Generator
This is a python script that tries to mimic a bigram model using a single layer neural network.

In [1]:
# imports and loading the dataset
from pathlib import Path
import torch

data_file = Path("data/names.txt")
data = data_file.read_text().split()

In [2]:
special_char = "."
chars = sorted(list(set(special_char.join(data))))

In [3]:
stoi = {s:i for i,s in enumerate(chars)}
itos = {i:s for i,s in enumerate(chars)}

In [4]:
# creating the dataset
xs, ys = [], []
for name in data:
    word = special_char + name + special_char
    for ch1, ch2 in zip(word, word[1:]):
        ix1, ix2 = stoi[ch1], stoi[ch2]
        xs.append(ix1)
        ys.append(ix2)
# create tensors
xs, ys = torch.tensor(xs), torch.tensor(ys)
num = xs.nelement()
num, xs, ys

(228146,
 tensor([ 0,  5, 13,  ..., 25, 26, 24]),
 tensor([ 5, 13, 13,  ..., 26, 24,  0]))

In [5]:
# xs should rather be an encoding, than an integer
# we'll use one-hot encodings for the inputs
import torch.nn.functional as F
xenc = F.one_hot(xs, num_classes=27)

In [6]:
# Defining the Layer
g = torch.Generator().manual_seed(10)
W = torch.randn((27, 27), generator=g, requires_grad=True)

In [17]:
# learn
learning_rate = 10

for k in range(5000):

    # forward prop

    # encode the input xs
    xenc = F.one_hot(xs, num_classes=27).float()
    # get the logits
    logits = xenc @ W
    # get the counts
    counts = logits.exp()
    # get the probs
    probs = counts / counts.sum(1, keepdim=True)
    # get neg log likelihood or loss
    loss = -probs[torch.arange(num), ys].log().mean()
    # loss += 0.01*(W**2).mean() # regularization for more smoothened probabilities
    # equivalent to laplacian correction in bigram approach


    # reset w.grad and prop back
    W.grad = None
    loss.backward()

    # update
    W.data += -(learning_rate)*W.grad
    if not k%500:
        print(loss.item())

2.454429864883423
2.4544167518615723
2.454404830932617
2.454393148422241
2.45438289642334
2.4543724060058594
2.4543628692626953
2.4543540477752686
2.454345464706421
2.4543375968933105


In [18]:
# sampling from the neaural network

g = torch.Generator().manual_seed(46)
print(f"{loss=}")
for i in range(5):

    out = []
    ix = 0

    while True:
        # get encodings
        xenc = F.one_hot(torch.tensor([ix]), num_classes=27).float()

        # get prob
        logits = xenc @ W
        counts = logits.exp()
        p = counts / counts.sum(1, keepdim=True)

        ix = torch.multinomial(p, num_samples=1, replacement=True, generator=g).item()
        out.append(itos[ix])
        if ix == 0:
            break
    print("".join(out))

    

loss=tensor(2.4543, grad_fn=<NegBackward0>)
benamaha.
kynilalos.
stanoustiei.
labrein.
del.


## Concluding
With the neural network approach, we reached at the same result as that of bigram model with a more flexible solution.