###  makemore demo - YouTube follow along

[Video - The spelled-out intro to language modeling: building makemore](https://www.youtube.com/watch?v=PaCmpygFfXo&list=PLAqhIrjkxbuWI23v9cThsA9GvCAUhRvKZ&index=2)

In [2]:
import torch

# data set: 32k first names
words = open('names.txt', 'r').read().splitlines()
chars = sorted(list(set(''.join(words))))

# s to i lookup, setting `.` as 0 index in array and all others + 1
# we'll use `.` to mark the start and end of all words
stoi = {s: i+1 for i, s in enumerate(chars)}
stoi['.'] = 0

# i to s lookup
itos = {i: s for s, i in stoi.items()}

# Create a 27x27 matrix with values all set to 0
N = torch.zeros((27, 27), dtype=torch.int32)

# Get the counts
for w in words:
  # use `.` to mark the start and end of all words
  chs = ['.'] + list(w) + ['.']
  for ch1, ch2 in zip(chs, chs[1:]):
    # integer index of this character in stoi 0-27
    ix1 = stoi[ch1]
    ix2 = stoi[ch2]
    N[ix1, ix2] += 1

# prepare probabilities, parameters of our bigram language model -
P = N.float()
# 27, 27
# 27, 1  # This is "broadcastable" and it stretches the 1 into all 27 rows
# https://pytorch.org/docs/stable/notes/broadcasting.html?highlight=broadcasting

# Below uses `/=` to avoid creating new tensor, ie more efficient
P /= P.sum(1, keepdim=True)

g = torch.Generator().manual_seed(2147483647)

for i in range(5):
  out = []
  ix = 0
  while True:
    p = P[ix]
    ix = torch.multinomial(
        p, num_samples=1, replacement=True, generator=g).item()
    out.append(itos[ix])
    # Break with `.` is found, marking the end of the word
    if ix == 0:
      break

  print(''.join(out))


mor.
axx.
minaymoryles.
kondlaisah.
anchshizarie.
