In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import torch

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/names-txt/LICENSE
/kaggle/input/names-txt/README.md
/kaggle/input/names-txt/makemore.py
/kaggle/input/names-txt/names.txt


In [2]:
words = pd.read_csv('/kaggle/input/names-txt/names.txt', header = None)
words.columns = ['Names']

In [3]:
words.head()

Unnamed: 0,Names
0,emma
1,olivia
2,ava
3,isabella
4,sophia


## Bigram vs Trigram langauge Model

In [4]:
chs = set()
for w in words['Names']:
    for char in w:
        chs.add(char)
chs

{'a',
 'b',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'j',
 'k',
 'l',
 'm',
 'n',
 'o',
 'p',
 'q',
 'r',
 's',
 't',
 'u',
 'v',
 'w',
 'x',
 'y',
 'z'}

In [5]:
idx = 1
stoi = {}
for char in chs:
    stoi[char] = idx
    idx += 1
stoi['.'] = 0
stoi

{'x': 1,
 'h': 2,
 'k': 3,
 'z': 4,
 's': 5,
 'f': 6,
 'q': 7,
 'u': 8,
 'i': 9,
 'm': 10,
 'y': 11,
 'g': 12,
 'v': 13,
 'd': 14,
 'b': 15,
 'j': 16,
 'a': 17,
 't': 18,
 'n': 19,
 'r': 20,
 'l': 21,
 'e': 22,
 'p': 23,
 'w': 24,
 'o': 25,
 'c': 26,
 '.': 0}

In [6]:
itos = {}
for key, val in stoi.items():
    itos[val] = key
itos

{1: 'x',
 2: 'h',
 3: 'k',
 4: 'z',
 5: 's',
 6: 'f',
 7: 'q',
 8: 'u',
 9: 'i',
 10: 'm',
 11: 'y',
 12: 'g',
 13: 'v',
 14: 'd',
 15: 'b',
 16: 'j',
 17: 'a',
 18: 't',
 19: 'n',
 20: 'r',
 21: 'l',
 22: 'e',
 23: 'p',
 24: 'w',
 25: 'o',
 26: 'c',
 0: '.'}

#### Bigram

Populating Dataset

In [7]:
xs, ys = [], []
for w in words['Names']:
    chs = ['.'] + list(w) + ['.']
    for ch1, ch2 in zip(chs, chs[1:]):
        ix1 = stoi[ch1]
        ix2 = stoi[ch2]
        xs.append(ix1)
        ys.append(ix2)
xs = torch.tensor(xs)
ys = torch.tensor(ys)

Random Initialisation

In [8]:
g = torch.Generator().manual_seed(2**31 - 1)
w = torch.randn((27, 27), generator = g, requires_grad = True)

Gradient Descent

In [9]:
epoch = 200
for k in range(1, epoch+1):
    # forward pass
    xenc = torch.nn.functional.one_hot(xs, num_classes = 27).float()
    logits = xenc @ w
    counts = logits.exp()
    probs = counts / counts.sum(1, keepdims = True)
    loss = -probs[torch.arange(xs.shape[0]), ys].log().mean() + 0.01*(w**2).mean()
    
    # backward pass
    w.grad = None
    loss.backward()
    
    # update
    w.data += -50*w.grad
    
    # print
    if (k%50 == 0):
        print(f"epoch = {k}\n----------")

epoch = 50
----------
epoch = 100
----------
epoch = 150
----------
epoch = 200
----------


In [10]:
g = torch.Generator().manual_seed(2**31 - 1)
for i in range(5):
    out = []
    ix = 0
    while True:
        xenc = torch.nn.functional.one_hot(torch.tensor([ix]), num_classes = 27).float()
        logits = xenc @ w
        counts = logits.exp()
        p = counts / counts.sum(1, keepdims = True)
        
        ix = torch.multinomial(p, num_samples = 1, replacement = True, generator = g).item()
        chs = itos[ix]
        out.append(chs)
        if (ix == 0):
            break
    print(''.join(out))

m.
dize.
kaylaic.
jalahoph.
da.


#### Trigram

Populating dataset

In [11]:
xs, ys = [], []
for w in words['Names']:
    chs = ['.'] + list(w) + ['.']
    for ch1, ch2, ch3 in zip(chs, chs[1:], chs[2:]):
        ix1 = stoi[ch1]
        ix2 = stoi[ch2]
        ix3 = stoi[ch3]
        xs.append([ix1, ix2])
        ys.append(ix3)

In [12]:
xs

[[0, 22],
 [22, 10],
 [10, 10],
 [10, 17],
 [0, 25],
 [25, 21],
 [21, 9],
 [9, 13],
 [13, 9],
 [9, 17],
 [0, 17],
 [17, 13],
 [13, 17],
 [0, 9],
 [9, 5],
 [5, 17],
 [17, 15],
 [15, 22],
 [22, 21],
 [21, 21],
 [21, 17],
 [0, 5],
 [5, 25],
 [25, 23],
 [23, 2],
 [2, 9],
 [9, 17],
 [0, 26],
 [26, 2],
 [2, 17],
 [17, 20],
 [20, 21],
 [21, 25],
 [25, 18],
 [18, 18],
 [18, 22],
 [0, 10],
 [10, 9],
 [9, 17],
 [0, 17],
 [17, 10],
 [10, 22],
 [22, 21],
 [21, 9],
 [9, 17],
 [0, 2],
 [2, 17],
 [17, 20],
 [20, 23],
 [23, 22],
 [22, 20],
 [0, 22],
 [22, 13],
 [13, 22],
 [22, 21],
 [21, 11],
 [11, 19],
 [0, 17],
 [17, 15],
 [15, 9],
 [9, 12],
 [12, 17],
 [17, 9],
 [9, 21],
 [0, 22],
 [22, 10],
 [10, 9],
 [9, 21],
 [21, 11],
 [0, 22],
 [22, 21],
 [21, 9],
 [9, 4],
 [4, 17],
 [17, 15],
 [15, 22],
 [22, 18],
 [18, 2],
 [0, 10],
 [10, 9],
 [9, 21],
 [21, 17],
 [0, 22],
 [22, 21],
 [21, 21],
 [21, 17],
 [0, 17],
 [17, 13],
 [13, 22],
 [22, 20],
 [20, 11],
 [0, 5],
 [5, 25],
 [25, 6],
 [6, 9],
 [9, 17],
 [