# **Download Data**

In [127]:
!wget https://raw.githubusercontent.com/karpathy/makemore/master/names.txt

--2024-02-18 15:04:30--  https://raw.githubusercontent.com/karpathy/makemore/master/names.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 228145 (223K) [text/plain]
Saving to: ‘names.txt.6’


2024-02-18 15:04:30 (8.62 MB/s) - ‘names.txt.6’ saved [228145/228145]



# **Read names**

In [128]:
words = open('names.txt', 'r').read().splitlines()

# **Show first 15 names**

In [129]:
words[:15]

['emma',
 'olivia',
 'ava',
 'isabella',
 'sophia',
 'charlotte',
 'mia',
 'amelia',
 'harper',
 'evelyn',
 'abigail',
 'emily',
 'elizabeth',
 'mila',
 'ella']

# **Initialize 3D Array**

In [130]:
import torch

In [131]:
N = torch.zeros((27, 27, 27), dtype=torch.int32)

In [132]:
chars = sorted(list(set(''.join(words))))

In [133]:
stoi = {s:i+1 for i,s in enumerate(chars)}
stoi['.'] = 0
itos = {i:s for s,i in stoi.items()}

In [134]:
stoi

{'a': 1,
 'b': 2,
 'c': 3,
 'd': 4,
 'e': 5,
 'f': 6,
 'g': 7,
 'h': 8,
 'i': 9,
 'j': 10,
 'k': 11,
 'l': 12,
 'm': 13,
 'n': 14,
 'o': 15,
 'p': 16,
 'q': 17,
 'r': 18,
 's': 19,
 't': 20,
 'u': 21,
 'v': 22,
 'w': 23,
 'x': 24,
 'y': 25,
 'z': 26,
 '.': 0}

In [135]:
itos

{1: 'a',
 2: 'b',
 3: 'c',
 4: 'd',
 5: 'e',
 6: 'f',
 7: 'g',
 8: 'h',
 9: 'i',
 10: 'j',
 11: 'k',
 12: 'l',
 13: 'm',
 14: 'n',
 15: 'o',
 16: 'p',
 17: 'q',
 18: 'r',
 19: 's',
 20: 't',
 21: 'u',
 22: 'v',
 23: 'w',
 24: 'x',
 25: 'y',
 26: 'z',
 0: '.'}



# **Building Tri-Gram Model**

In [136]:
for w in words:
  chs = ['.', '.'] + list(w) + ['.']
  for ch1, ch2, ch3 in zip(chs, chs[1:], chs[2:]):
    ix1 = stoi[ch1]
    ix2 = stoi[ch2]
    ix3 = stoi[ch3]
    N[ix1, ix2, ix3] += 1

# **Compute Probs**

In [137]:
# Smoothing the counts
P = (N+1).float()
P /= P.sum(dim=2, keepdims=True)

In [138]:
P /= P.sum(dim=2, keepdims=True)

# **Generate 50 new names**

In [139]:
g = torch.Generator().manual_seed(2147483647)

for i in range(50):
  out = []
  ix1 = 0
  ix2 = 0
  while True:
    p = P[ix1, ix2].float()
    ix3 = torch.multinomial(p, num_samples=1, replacement=True, generator=g).item()
    out.append(itos[ix3])
    ix1, ix2 = ix2, ix3
    if ix3 == 0:
      break
  print(''.join(out))

junide.
jakasid.
prelay.
adin.
kairritoper.
sathen.
sameia.
yanileniassibduinrwin.
lessiyanayla.
te.
farmumthyfortumj.
ponn.
lena.
jaylicore.
ya.
jocken.
jamilyn.
kmikim.
yah.
sanaasnhavi.
monszxhddion.
mathani.
zie.
paun.
ty.
tin.
sreli.
ish.
dyn.
rumel.
jemah.
dawata.
kha.
cra.
raydnh.
adorta.
malyn.
brey.
aur.
lavarocbzthemiraya.
ath.
basely.
tavisotten.
salee.
marlen.
em.
fabethellianten.
chan.
jazaodridyden.
jhaliypvrgia.


# **Evaluate Model**

In [140]:
import math
# Calculating perplexity for the first 15 words
log_likelihood = 0.0
n = 0
for w in words[:15]:
  chs = ['.', '.'] + list(w) + ['.']
  for ch1, ch2, ch3 in zip(chs, chs[1:], chs[2:]):
    ix1 = stoi[ch1]
    ix2 = stoi[ch2]
    ix3 = stoi[ch3]
    prob = P[ix1, ix2, ix3]
    logprob = torch.log(prob)
    log_likelihood += logprob
    n += 1

print(f'{log_likelihood=}')
nll = -log_likelihood
print(f'{nll=}')
print(f'{nll/n}')
ppl = math.exp(nll/n)
print(f'{ppl=}')

log_likelihood=tensor(-203.7296)
nll=tensor(203.7296)
2.017124891281128
ppl=7.516682556920932



*   The GOAL is to maximize the likelihood of the data w.r.t model parameters
(here, statisitcal modeling)

*   Equivalent to maximizing the log likelihood (because the log is a monotonic function)   

*   Equivalent to minimizing the negative log likelihood

*   Equivalent to minimizing the average negative log likelihood

# **Evaluating my name "Saad"**

In [141]:
log_likelihood = 0.0
n = 0

for w in ["saad"]:
  chs = ['.'] + ['.'] + list(w) + ['.'] + ['.']
  for ch1, ch2, ch3 in zip(chs, chs[1:], chs[2:]):
    ix1 = stoi[ch1]
    ix2 = stoi[ch2]
    ix3 = stoi[ch3]
    prob = P[ix1, ix2, ix3]
    logprob = torch.log(prob)
    log_likelihood += logprob
    n += 1
    print(f'{ch1}{ch2}{ch3}: {prob:.4f}: {logprob:.4f}')

print(f'{log_likelihood=}')
nll = -log_likelihood
print(f'{nll=}')
print(f'{nll/n}')

..s: 0.0641: -2.7468
.sa: 0.2863: -1.2508
saa: 0.0350: -3.3519
aad: 0.0823: -2.4970
ad.: 0.1001: -2.3017
d..: 0.0370: -3.2958
log_likelihood=tensor(-15.4441)
nll=tensor(15.4441)
2.5740175247192383
