In [1]:
import pandas as pd
import torch

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/names-txt/LICENSE
/kaggle/input/names-txt/README.md
/kaggle/input/names-txt/makemore.py
/kaggle/input/names-txt/names.txt


In [2]:
words = pd.read_csv('/kaggle/input/names-txt/names.txt', header = None)
words.columns = ['Names']

In [3]:
words.head()

Unnamed: 0,Names
0,emma
1,olivia
2,ava
3,isabella
4,sophia


## Bigram vs Trigram langauge Model

In [4]:
chs = set()
for w in words['Names']:
    for char in w:
        chs.add(char)
chs

{'a',
 'b',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'j',
 'k',
 'l',
 'm',
 'n',
 'o',
 'p',
 'q',
 'r',
 's',
 't',
 'u',
 'v',
 'w',
 'x',
 'y',
 'z'}

In [5]:
idx = 1
stoi = {}
for char in chs:
    stoi[char] = idx
    idx += 1
stoi['.'] = 0
stoi

{'r': 1,
 'n': 2,
 'c': 3,
 'k': 4,
 'w': 5,
 'x': 6,
 'v': 7,
 'a': 8,
 'y': 9,
 'm': 10,
 'u': 11,
 'q': 12,
 'f': 13,
 'z': 14,
 'o': 15,
 'h': 16,
 'g': 17,
 't': 18,
 'd': 19,
 's': 20,
 'i': 21,
 'p': 22,
 'e': 23,
 'b': 24,
 'j': 25,
 'l': 26,
 '.': 0}

In [6]:
itos = {v: k for k, v in stoi.items()}
itos

{1: 'r',
 2: 'n',
 3: 'c',
 4: 'k',
 5: 'w',
 6: 'x',
 7: 'v',
 8: 'a',
 9: 'y',
 10: 'm',
 11: 'u',
 12: 'q',
 13: 'f',
 14: 'z',
 15: 'o',
 16: 'h',
 17: 'g',
 18: 't',
 19: 'd',
 20: 's',
 21: 'i',
 22: 'p',
 23: 'e',
 24: 'b',
 25: 'j',
 26: 'l',
 0: '.'}

#### Bigram

Populating Dataset

In [7]:
xs, ys = [], []
for w in words['Names']:
    chs = ['.'] + list(w) + ['.']
    for ch1, ch2 in zip(chs, chs[1:]):
        ix1 = stoi[ch1]
        ix2 = stoi[ch2]
        xs.append(ix1)
        ys.append(ix2)
xs = torch.tensor([xs])
ys = torch.tensor([ys])

In [8]:
xs.shape

torch.Size([1, 228146])

Random Initialisation

In [9]:
g = torch.Generator().manual_seed(2**31 - 1)
w = torch.randn((27, 27), generator = g, requires_grad = True)

Gradient Descent

In [10]:
epoch = 200
for k in range(1, epoch+1):
    # forward pass
    xenc = torch.nn.functional.one_hot(xs, num_classes = 27).float()
    logits = xenc @ w
    counts = logits.exp()
    probs = counts / counts.sum(1, keepdims = True)
    loss = -probs[torch.arange(len(ys)), ys].log().mean() + 0.01*(w**2).mean()
    
    # backward pass
    w.grad = None
    loss.backward()
    
    # update
    w.data += -50*w.grad
    
    # print
    if (k%50 == 0):
        print(f"epoch = {k}\n----------")

epoch = 50
----------
epoch = 100
----------
epoch = 150
----------
epoch = 200
----------


In [11]:
g = torch.Generator().manual_seed(2**31 - 1)
for i in range(5):
    out = []
    ix = 0
    while True:
        xenc = torch.nn.functional.one_hot(torch.tensor([ix]), num_classes = 27).float()
        logits = xenc @ w
        counts = logits.exp()
        p = counts / counts.sum(1, keepdims = True)
        
        ix = torch.multinomial(p, num_samples = 1, replacement = True, generator = g).item()
        chs = itos[ix]
        out.append(chs)
        if (ix == 0):
            break
    print(''.join(out))

miemkfkyhurgrl.
h.
ixnjeoczeg.
iyytqsolcovdxvellikqazhrijknyqwparmuknkiyzteynsqkdzmjywpjqrxsnlxxpfifsajxoksifm.
hxjsdlemgtmrvg.


#### Trigram

Populating dataset

In [12]:
xs, ys = [], []
for w in words['Names']:
    chs = ['.', '.'] + list(w) + ['.']
    for ch1, ch2, ch3 in zip(chs, chs[1:], chs[2:]):
        ix1 = stoi[ch1]
        ix2 = stoi[ch2]
        ix3 = stoi[ch3]
        xs.append([ix1, ix2])
        ys.append(ix3)

In [13]:
xs = torch.tensor(xs)
ys = torch.tensor(ys).reshape(len(ys), -1)

In [14]:
xs, ys

(tensor([[ 0,  0],
         [ 0, 23],
         [23, 10],
         ...,
         [14,  9],
         [ 9, 14],
         [14,  6]]),
 tensor([[23],
         [10],
         [10],
         ...,
         [14],
         [ 6],
         [ 0]]))

Random Initialisation

In [15]:
g = torch.Generator().manual_seed(2**31 - 1)
w = torch.randn((27*2, 27), generator = g, requires_grad = True)

Gradient Descent

In [16]:
x = torch.tensor([[1, 0, 1], [0, 1, 1]]).T
print(x)
xenc = torch.nn.functional.one_hot(x, num_classes = 27).float().reshape(3, 54)
print(xenc)
print(xenc.shape)

tensor([[1, 0],
        [0, 1],
        [1, 1]])
tensor([[0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])
torch.Size([3, 54])


In [17]:
batch_size = 64
epoch = 200
learning_rate = 50
reg_strength = 0.01

for k in range(1, epoch + 1):
    # Shuffle the data
    perm = torch.randperm(xs.size(0))
    xs_shuffled = xs[perm]
    ys_shuffled = ys[perm]
    
    for i in range(0, xs.size(0), batch_size):
        x_batch = xs_shuffled[i:i + batch_size]
        y_batch = ys_shuffled[i:i + batch_size]
        
        # Forward pass
        x1 = x_batch[:, 0]
        x2 = x_batch[:, 1]
        x_enc = torch.cat((torch.nn.functional.one_hot(x1, num_classes=27), torch.nn.functional.one_hot(x2, num_classes=27)), dim=1).float()
        logits = x_enc @ w
        counts = logits.exp()
        probs = counts / counts.sum(1, keepdims=True)
        loss = -probs[torch.arange(x_batch.size(0)), y_batch].log().mean() + reg_strength * (w**2).mean()
        
        # Backward pass
        w.grad = None
        loss.backward()
        
        # Update
        w.data -= learning_rate * w.grad
    
    # Print
    if k % 50 == 0:
        print(f"epoch = {k}, loss = {loss.item()}")

epoch = 50, loss = 3.0078113079071045
epoch = 100, loss = 3.2045493125915527
epoch = 150, loss = 2.9018166065216064
epoch = 200, loss = 3.0016345977783203


Sampling

In [18]:
torch.nn.functional.one_hot(torch.tensor([[0], [1]]), num_classes = 27).float().reshape(1, 54)

tensor([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])

In [19]:
g = torch.Generator().manual_seed(2**31 - 1)
for i in range(5):
    out = []
    ix = 0
    while True:
        # incase of ix+1 == 27, we round it down to 0, using mod
        xenc = torch.nn.functional.one_hot(torch.tensor([[ix], [(ix+1)%27]]), num_classes = 27).float().reshape(1, 54)
        logits = xenc @ w
        counts = logits.exp()
        p = counts / counts.sum(1, keepdims = True)
        
        ix = torch.multinomial(p, num_samples = 1, replacement = True, generator = g).item()
        chs = itos[ix]
        out.append(chs)
        if (ix == 0):
            break
    print(''.join(out))

miemkaayrergrlai.
axrjerynee.
iayaesoaaaea.
vellieeazyriernnnaparm.
ankiyateynnald.
