In [1]:
!pip install matplotlib



In [2]:
words = open('names.txt', 'r').read().splitlines()

In [3]:
words[:10]

['emma',
 'olivia',
 'ava',
 'isabella',
 'sophia',
 'charlotte',
 'mia',
 'amelia',
 'harper',
 'evelyn']

In [4]:
len(words)

32033

In [5]:
min(len(w) for w in words)

2

In [6]:
max(len(w) for w in words)

15

### Bigram Language model

In [7]:
#predict the next character based on the current character

b = {} #Dictionary of all the bigrams in the dataset

for w in words:
    chs = ['.'] + list(w) + ['.']
    for ch1, ch2 in zip(chs, chs[1:]):
        bigram = (ch1, ch2)
        b[bigram] = b.get(bigram, 0) + 1

In [8]:
#sorted(b.items(), key=lambda kv:-kv[1])

In [9]:
import torch

In [10]:
N = torch.zeros((27,27), dtype=torch.int32)

In [11]:
chars = ['.'] + sorted(list(set(''.join(words))))
stoi = {ch:i for i, ch in enumerate(chars)}

In [12]:
itos = {i:ch for i, ch in enumerate(chars)}

In [13]:
for w in words:
    chs = ['.'] + list(w) + ['.']
    for ch1, ch2 in zip(chs, chs[1:]):
        ix1 = stoi[ch1]
        ix2 = stoi[ch2]
        N[ix1, ix2] += 1

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(16,16))
plt.imshow(N, cmap='Blues')
for i in range(27):
    for j in range(27):
        chstr = itos[i] + itos[j]
        plt.text(j, i, chstr, ha="center", va="bottom", color='gray')
        plt.text(j, i, N[i, j].item(), ha="center", va="top", color='gray')
plt.axis('off');

In [None]:
N[0]

In [None]:
p = N[0].float()
p = p/p.sum()
p

In [None]:
g = torch.Generator().manual_seed(2147483647)

In [None]:
ix = torch.multinomial(p, num_samples=1, replacement= True, generator=g).item()
itos[ix]

In [None]:
P = N.float()
P = P / P.sum(1, keepdim=True)

In [None]:
g = torch.Generator().manual_seed(2147483647)

for i in range(20):

    ix = 0
    out = []
    while True:
        p = N[ix].float()
        p = p/p.sum()
        ix = torch.multinomial(p, num_samples=1, replacement= True, generator=g).item()
        out.append(itos[ix])
        #print(itos[ix])
        if ix == 0:
            break
    print(''.join(out))

In [None]:
log_likelihood = 0
n = 0
for w in words:
    chs = ['.'] + list(w) + ['.']
    for ch1, ch2 in zip(chs, chs[1:]):
        ix1 = stoi[ch1]
        ix2 = stoi[ch2]
        prob = P[ix1, ix2]
        logprob = torch.log(prob)
        log_likelihood += logprob
        n+=1
    
print(-log_likelihood/n)

In [None]:
# create the training set of all the bigrams (x,y)
xs, ys = [], []

for w in words[:1]:
    chs = ['.'] + list(w) + ['.']
    for ch1, ch2 in zip(chs, chs[1:]):
        ix1 = stoi[ch1]
        ix2 = stoi[ch2]
        print(ch1, ch2)
        xs.append(ix1)
        ys.append(ix2)
        
xs = torch.tensor(xs)
ys = torch.tensor(ys)


In [None]:
xs

In [None]:
ys

In [None]:
import torch.nn.functional as F
xenc = F.one_hot(xs, num_classes=27).float()

In [None]:
xenc.shape

In [None]:
plt.imshow(xenc)

In [None]:
xenc.dtype

In [None]:
# (5,27) @ (27,1) 

W = torch.randn((27,27))
logits = xenc @ W #log-counts
counts = logits.exp()
probs = counts / counts.sum(1, keepdims=True)
#softmax layer

print(probs)

In [None]:
nlls = torch.zeros(5)
for i in range(5):
  # i-th bigram:
  x = xs[i].item() # input character index
  y = ys[i].item() # label character index
  print('--------')
  print(f'bigram example {i+1}: {itos[x]}{itos[y]} (indexes {x},{y})')
  print('input to the neural net:', x)
  print('output probabilities from the neural net:', probs[i])
  print('label (actual next character):', y)
  p = probs[i, y]
  print('probability assigned by the net to the the correct character:', p.item())
  logp = torch.log(p)
  print('log likelihood:', logp.item())
  nll = -logp
  print('negative log likelihood:', nll.item())
  nlls[i] = nll

print('=========')
print('average negative log likelihood, i.e. loss =', nlls.mean().item())

In [None]:
#------------From Scratch!!!------------------------

In [None]:
# Making the dataset :
xs = []
ys = []

for w in words: 
    chs = ['.'] + list(w) + ['.']
    for ch1, ch2 in zip(chs, chs[1:]):
        ix1 = stoi[ch1]
        ix2 = stoi[ch2]
        xs.append(ix1)
        ys.append(ix2)
xs = torch.tensor(xs)
ys = torch.tensor(ys)
        

In [None]:
#Encoding the inputs and the targets :
import torch.nn.functional as F

In [None]:
xenc = F.one_hot(xs).float()
xenc.shape

In [None]:
#One-layer Neural net
W = torch.randn(size=(27,27), requires_grad=True) #The weights represent the probabilities for each bigram

In [None]:
for k in range(100):
    logits = xenc @ W
    counts = logits.exp()
    probs = counts/counts.sum(1, keepdim=True)
    loss = -probs[torch.arange(xenc.shape[0]), ys].log().mean()
    print(loss.item())
    W.grad = None
    loss.backward()
    W.data += -10 * W.grad