<a href="https://colab.research.google.com/github/RNavs-44/Neural_Networks/blob/main/makemore_mlp.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn.functional as F
import random
import matplotlib.pyplot as plt # for making figures
%matplotlib inline

In [3]:
!wget https://raw.githubusercontent.com/karpathy/makemore/master/names.txt

--2024-03-16 12:57:43--  https://raw.githubusercontent.com/karpathy/makemore/master/names.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 228145 (223K) [text/plain]
Saving to: ‘names.txt.1’


2024-03-16 12:57:43 (12.5 MB/s) - ‘names.txt.1’ saved [228145/228145]



In [4]:
words = open('names.txt', 'r').read().splitlines()
words[:8]

['emma', 'olivia', 'ava', 'isabella', 'sophia', 'charlotte', 'mia', 'amelia']

In [5]:
len(words)

32033

In [16]:
# build vocabulary of characters and mapping to / from integers
chars = sorted(list(set(''.join(words))))
stoi = {s: i + 1 for i, s in enumerate(chars)}
stoi['.'] = 0
itos = {i: s for s, i in stoi.items()}
vocab_size = len(itos)
print(itos)
print(vocab_size)

{1: 'a', 2: 'b', 3: 'c', 4: 'd', 5: 'e', 6: 'f', 7: 'g', 8: 'h', 9: 'i', 10: 'j', 11: 'k', 12: 'l', 13: 'm', 14: 'n', 15: 'o', 16: 'p', 17: 'q', 18: 'r', 19: 's', 20: 't', 21: 'u', 22: 'v', 23: 'w', 24: 'x', 25: 'y', 26: 'z', 0: '.'}
27


In [31]:
# build the dataset
block_size = 3 # context length

def build_dataset(words):
  x, y = [], []
  for w in words:
    context = [0] * block_size
    for ch in w + '.':
      ix = stoi[ch]
      x.append(context)
      y.append(ix)
      context = context[1:] + [ix] # crop and append

  x = torch.tensor(x)
  y = torch.tensor(y)

  return x, y

random.seed(42)
random.shuffle(words)
n1 = int(0.8 * len(words))
n2 = int(0.9 * len(words))

xtr, ytr = build_dataset(words[:n1])
xdev, ydev = build_dataset(words[n1:n2])
xts, yts = build_dataset(words[n2:])

182544

In [52]:
n_embd = 10
n_hidden = 200

g = torch.Generator().manual_seed(2147483647)
c = torch.randn((vocab_size, n_embd))
w1 = torch.randn((n_embd * block_size, n_hidden))
b1 = torch.randn(n_hidden)
w2 = torch.randn((n_hidden, vocab_size))
b2 = torch.randn(vocab_size)
parameters = [c, w1, b1, w2, b2]

print(sum(p.nelement() for p in parameters)) # number of parameters in total

for p in parameters:
  p.requires_grad = True

11897


In [53]:
max_steps = 200000
batch_size = 32
lossi = []

for i in range(max_steps):
  # minibatch construction
  ix = torch.randint(0, xtr.shape[0], (batch_size, )) # randint(low, hight, size)
  xb, yb = xtr[ix], ytr[ix]

  # forward pass
  emb = c[xb]
  embcat = emb.view(emb.shape[0], -1)
  hpreact = embcat @ w1 + b1
  h = torch.tanh(hpreact)
  logits = h @ w2 + b2
  loss = F.cross_entropy(logits, yb)

  # backward pass
  for p in parameters:
    p.grad = None
  loss.backward()

  # update
  lr = 0.1 if i < 100000 else 0.01
  for p in parameters:
    p.data += -lr * p.grad

  # track stats
  if i % 10000 == 0:
    print(f'{i:7d} / {max_steps:7d}: {loss.item()}')
  lossi.append(loss.log10().item())

print(loss.item())


      0 /  200000: 29.526050567626953
  10000 /  200000: 2.6376473903656006
  20000 /  200000: 2.377519130706787
  30000 /  200000: 2.168323278427124
  40000 /  200000: 2.506762742996216
  50000 /  200000: 2.557873487472534
  60000 /  200000: 2.112635850906372
  70000 /  200000: 1.9628098011016846
  80000 /  200000: 2.147214651107788
  90000 /  200000: 2.515380620956421
 100000 /  200000: 2.3908863067626953
 110000 /  200000: 1.855699896812439
 120000 /  200000: 2.208400011062622
 130000 /  200000: 2.8871803283691406
 140000 /  200000: 2.1946489810943604
 150000 /  200000: 2.048001766204834
 160000 /  200000: 2.326359748840332
 170000 /  200000: 2.1858012676239014
 180000 /  200000: 2.0613818168640137
 190000 /  200000: 2.0051865577697754
2.2710604667663574


In [54]:
@torch.no_grad()
def split_loss(split):
  x, y = {
      'train': (xtr, ytr),
      'val': (xdev, ydev),
      'test': (xts, yts),
  }[split]
  emb = c[x]
  embcat = emb.view(emb.shape[0], -1)
  hpreact = embcat @ w1 + b1
  h = torch.tanh(hpreact)
  logits = h @ w2 + b2
  loss = F.cross_entropy(logits, y)
  print(split, loss.item())

split_loss('train')
split_loss('val')

train 2.124711275100708
val 2.1641905307769775


In [55]:
# sample from model
for _ in range(20):
  out = []
  context = [0] * block_size
  while True:
    # forward pass neural net
    emb = c[torch.tensor(context)]
    embcat = emb.view(1, -1)
    hpreact = embcat @ w1 + b1
    h = torch.tanh(hpreact)
    logits = h @ w2 + b2
    probs = F.softmax(logits, dim = 1)

    # sample from distribution
    ix = torch.multinomial(probs, num_samples=1, generator=g).item()

    # shift context window and track samples
    context = context[1:] + [ix]
    out.append(ix)

    # if we sample special '.' token break
    if ix == 0:
      break
  print(''.join(itos[i] for i in out)) # print generated word

junide.
janaque.
pres.
amaina.
kailritolemarayeem.
sameaa.
zamilenias.
dedaine.
imer.
sna.
inayla.
terinleumarifoltamj.
ponne.
mah.
jaylinora.
yae.
ocamy.
jamilynn.
miki.
wynn.
