In [1]:
import numpy as np
import matplotlib.pylab as plt
import torch

In [2]:
with open("names.txt", "r") as f:
    names = f.read().splitlines()
# names = names[:5]
# names

In [3]:
names = ['.' +name + '.' for name in names]
# names

In [5]:
from collections import defaultdict

counts = defaultdict(int)
for name in names:
    bi_gram = [i+j for i, j in zip(name, name[1:])]
    for ch1, ch2 in zip(bi_gram, name[2:]):
        # print(f'{ch1=} {ch2=}')
        tri_gram = (ch1, ch2)
        counts[tri_gram] += 1

In [6]:
sorted(counts.items(), key=lambda item: item[1], reverse=True)[:10]

[(('ah', '.'), 1714),
 (('na', '.'), 1673),
 (('an', '.'), 1509),
 (('on', '.'), 1503),
 (('.m', 'a'), 1453),
 (('.j', 'a'), 1255),
 (('.k', 'a'), 1254),
 (('en', '.'), 1217),
 (('ly', 'n'), 976),
 (('yn', '.'), 953)]

In [7]:
bigrams, onegrams = zip(*counts.keys())
dictionary_col = set(bigrams)
dictionary_row = set(onegrams)

In [8]:
map_int_to_char_row = {i: ch for i, ch in enumerate(dictionary_row)}
map_int_to_char_col = {i: ch for i, ch in enumerate(dictionary_col)}
map_char_to_int_row ={ch:i for i, ch in map_int_to_char_row.items()}
map_char_to_int_col ={ch:i for i, ch in map_int_to_char_col.items()}


In [9]:
counts_vect = torch.zeros((len(dictionary_col), len(dictionary_row)))

for name in names:
    bi_gram = [i+j for i, j in zip(name, name[1:])]
    for ch1, ch2 in zip(bi_gram, name[2:]):
        i_ch1, i_ch2 = map_char_to_int_col[ch1], map_char_to_int_row[ch2]
        counts_vect[i_ch1, i_ch2] += 1
counts_vect

tensor([[ 0.,  3., 51.,  ...,  8.,  0.,  0.],
        [ 0.,  0.,  0.,  ...,  0.,  0.,  0.],
        [ 0.,  1.,  1.,  ...,  0.,  1.,  0.],
        ...,
        [ 0.,  0.,  0.,  ...,  0.,  0.,  0.],
        [ 0.,  6., 20.,  ...,  1., 13.,  0.],
        [ 0.,  0.,  0.,  ...,  0.,  0.,  0.]])

In [15]:
# smoothing
counts_vect+=5

In [16]:
# normalization by row
counts_vect /= counts_vect.sum(1, keepdim=True)
counts_vect

tensor([[0.0368, 0.0368, 0.0373,  ..., 0.0369, 0.0368, 0.0368],
        [0.0370, 0.0370, 0.0370,  ..., 0.0370, 0.0370, 0.0370],
        [0.0369, 0.0370, 0.0370,  ..., 0.0369, 0.0370, 0.0369],
        ...,
        [0.0370, 0.0370, 0.0370,  ..., 0.0370, 0.0370, 0.0370],
        [0.0368, 0.0370, 0.0374,  ..., 0.0368, 0.0372, 0.0368],
        [0.0370, 0.0370, 0.0370,  ..., 0.0370, 0.0370, 0.0370]])

In [17]:
counts_vect.shape

torch.Size([601, 27])

In [40]:
import random 

staring_inexes = [map_char_to_int_col[bigram] for bigram in dictionary_col if bigram[0]=='.']
starting_char = random.choices(staring_inexes)[0]

word = [starting_char]
while True:
    p = counts_vect[word[-1]]
    samples = torch.multinomial(p, 1, replacement=True).item()
    
    word.append(samples)
    if map_int_to_char_row[word[-1]][-1] == '.':
        break

final = "".join([map_int_to_char_row[w] for w in word[1:]])
final = map_int_to_char_col[starting_char] + final
print(final)
    

.udcxoarkqxfnozjgzcxuxwbbuhkyqrkqhfhhumzvyzdiavtwgmyva.


In [41]:
log_likelihood = 0.0
n = 0

for name in names:
    bi_gram = [i+j for i, j in zip(name, name[1:])]
    for ch1, ch2 in zip(bi_gram, name[2:]):
        i_ch1, i_ch2 = map_char_to_int_col[ch1], map_char_to_int_row[ch2]
        prob = counts_vect[i_ch1, i_ch2]
        logprob = torch.log(prob)
        log_likelihood += logprob
        n += 1
        #print(f'{ch1}{ch2}: {prob:.4f} {logprob:.4f}')

print(f'{log_likelihood=}')
nll = -log_likelihood
print(f'{nll=}')
print(f'{nll/n}')

log_likelihood=tensor(-640606.1875)
nll=tensor(640606.1875)
3.2665157318115234


# Neural Networks

In [42]:
with open("names.txt", "r") as f:
    names = f.read().splitlines()

In [43]:
names = ['.' +name + '.' for name in names]

In [44]:
xs, ys, = [], []

for name in names:
    bi_gram = [i+j for i, j in zip(name, name[1:])]
    for ch1, ch2 in zip(bi_gram, name[2:]):
        i1 = map_char_to_int_col[ch1]
        i2 = map_char_to_int_row[ch2]
        xs.append(i1)
        ys.append(i2)

len_xs = len(set(xs))
len_ys = len(set(ys))
print(f'{len_xs=}')
print(f'{len_ys=}')
xs = torch.tensor(xs)
ys = torch.tensor(ys)

len_xs=601
len_ys=27


In [45]:
W = torch.randn(601, 27, requires_grad=True)

In [46]:
import torch.nn.functional as F 
xenc = F.one_hot(xs, num_classes=601).float()
xenc 

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])

In [47]:
logits = xenc @ W

In [48]:
logits.shape

torch.Size([196113, 27])

In [49]:
counts = logits.exp()
probs = counts/ counts.sum(1,keepdim=True)

In [50]:
print(probs[0, ys[0]])
print(probs[1, ys[1]])

tensor(0.0121, grad_fn=<SelectBackward0>)
tensor(0.0189, grad_fn=<SelectBackward0>)


In [51]:
- probs[0, ys[0]].log()

tensor(4.4183, grad_fn=<NegBackward0>)

In [52]:
- torch.tensor([0, 0.00001, 0.8, 0.999, 5]).log()
# dla bardzo maych wartości daje bardzo duże wartości, a my chcialibyśmy żeby prawdopodobieństwo 0.0001 zamieniło sie w 1,
# ale potrafimy tylko minimalizować dla tego zamieniamy bardzo małe liczby na bardzo duże i wtedy staramy się je minimalizować

tensor([        inf,  1.1513e+01,  2.2314e-01,  1.0005e-03, -1.6094e+00])

In [53]:
# loss is avarge negative likehood 

In [54]:
loss = -probs[torch.arange(196113), ys].log().mean()
loss

tensor(3.7356, grad_fn=<NegBackward0>)

# final NN

In [55]:
with open("names.txt", "r") as f:
    names = f.read().splitlines()
names = ['.' +name + '.' for name in names]

xs, ys, = [], []

for name in names:
    bi_gram = [i+j for i, j in zip(name, name[1:])]
    for ch1, ch2 in zip(bi_gram, name[2:]):
        i1 = map_char_to_int_col[ch1]
        i2 = map_char_to_int_row[ch2]
        xs.append(i1)
        ys.append(i2)

len_xs = len(set(xs))
len_ys = len(set(ys))
print(f'{len_xs=}')
print(f'{len_ys=}')
xs = torch.tensor(xs)
ys = torch.tensor(ys)

len_xs=601
len_ys=27


In [56]:
import torch.nn.functional as F 

W = torch.randn(601, 27, requires_grad=True)

for i in range(100):
    xenc = F.one_hot(xs, num_classes=601).float()
    logits = xenc @ W
    counts = logits.exp()
    probs = counts / counts.sum(1, keepdim=True)

    loss = -probs[torch.arange(196113), ys].log().mean()
    if i%10==0: print(f'{loss.item()=}')
    W.grad = None
    loss.backward()

    W.data += -100 * W.grad  


loss.item()=3.812889337539673
loss.item()=2.9199628829956055
loss.item()=2.657243490219116
loss.item()=2.5285897254943848
loss.item()=2.4491477012634277
loss.item()=2.3940205574035645
loss.item()=2.3532018661499023
loss.item()=2.3217203617095947
loss.item()=2.29670786857605
loss.item()=2.2763495445251465


In [57]:
# W and counts_vect should be the same
W

tensor([[-0.0510, -0.0433,  0.8925,  ..., -0.6487,  0.3182, -0.6579],
        [-1.7876,  0.1065, -1.2005,  ..., -0.3005, -0.9137, -0.9794],
        [-1.8114, -0.7504, -1.5938,  ...,  0.5628, -0.7114,  1.2853],
        ...,
        [-2.0834, -0.3072,  0.0968,  ...,  1.1767, -0.5997,  1.2214],
        [ 0.3543, -0.3091,  1.6474,  ...,  0.4291,  0.3654,  0.5066],
        [ 0.6297, -1.3987,  0.4014,  ...,  0.4010,  0.5598,  0.3477]],
       requires_grad=True)

In [58]:
counts_vect

tensor([[0.0368, 0.0368, 0.0373,  ..., 0.0369, 0.0368, 0.0368],
        [0.0370, 0.0370, 0.0370,  ..., 0.0370, 0.0370, 0.0370],
        [0.0369, 0.0370, 0.0370,  ..., 0.0369, 0.0370, 0.0369],
        ...,
        [0.0370, 0.0370, 0.0370,  ..., 0.0370, 0.0370, 0.0370],
        [0.0368, 0.0370, 0.0374,  ..., 0.0368, 0.0372, 0.0368],
        [0.0370, 0.0370, 0.0370,  ..., 0.0370, 0.0370, 0.0370]])

In [59]:
staring_inexes = [map_char_to_int_col[bigram] for bigram in dictionary_col if bigram[0]=='.']
starting_char = random.choices(staring_inexes)[0]

word = [starting_char]

while True:
    xenc = F.one_hot(torch.tensor(word[-1]), num_classes=601).float()
    logits = xenc @ W
    counts = logits.exp()
    # print(counts.sum(1, keepdim=True))
    p = counts / counts.sum()

    samples = torch.multinomial(p, 1, replacement=True)
    word.append(samples.item())
    # print(f'{map_int_to_char_row[samples.item()][-1]=}')
    # print(f'{word}')
    if map_int_to_char_row[word[-1]][-1] == '.':
        break
word = "".join([map_int_to_char_row[w] for w in word[1:]])
word = map_int_to_char_col[starting_char] + word
print(word)


.hjaqohldmaejlsptnbbdssgm.


random world

In [60]:
staring_inexes = [map_char_to_int_col[bigram] for bigram in dictionary_col if bigram[0]=='.']
starting_char = random.choices(staring_inexes)[0]

word = [starting_char]

while True:
    xenc = F.one_hot(torch.tensor(word[-1]), num_classes=601).float()
    logits = xenc @ W
    counts = logits.exp()
    # print(counts.sum(1, keepdim=True))
    p = counts / counts.sum()

    samples = torch.randint(27, size=(1,))
    word.append(samples.item())
    # print(f'{map_int_to_char_row[samples.item()][-1]=}')
    # print(f'{word}')
    if map_int_to_char_row[word[-1]][-1] == '.':
        break
    