In [1]:
import numpy as np
import pandas as pd
import requests as rqst
import torch
import matplotlib.pyplot as plt
import torch.nn.functional as F

In [2]:
words = open('names.txt').read().splitlines()
words[:8]

['emma', 'olivia', 'ava', 'isabella', 'sophia', 'charlotte', 'mia', 'amelia']

In [3]:
chars = sorted(list(set(''.join(words))))
chars

['a',
 'b',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'j',
 'k',
 'l',
 'm',
 'n',
 'o',
 'p',
 'q',
 'r',
 's',
 't',
 'u',
 'v',
 'w',
 'x',
 'y',
 'z']

In [4]:
# build index look up table
stoi = {char: i + 1 for i, char in enumerate(chars)}
stoi['.'] = 0
itos = {i : char for char, i in stoi.items()}
itos

{1: 'a',
 2: 'b',
 3: 'c',
 4: 'd',
 5: 'e',
 6: 'f',
 7: 'g',
 8: 'h',
 9: 'i',
 10: 'j',
 11: 'k',
 12: 'l',
 13: 'm',
 14: 'n',
 15: 'o',
 16: 'p',
 17: 'q',
 18: 'r',
 19: 's',
 20: 't',
 21: 'u',
 22: 'v',
 23: 'w',
 24: 'x',
 25: 'y',
 26: 'z',
 0: '.'}

In [5]:
block_size = 3
X, Y = [], []
for word in words[:5]:
    print(word)
    context = [0]*block_size
    for char in word + ".":
        ix = stoi[char]
        X.append(context)
        Y.append(ix)
        print(''.join(itos[i] for i in context), '---->', itos[ix])
        context = context[1:] + [ix]
X = torch.tensor(X)
Y = torch.tensor(Y)

emma
... ----> e
..e ----> m
.em ----> m
emm ----> a
mma ----> .
olivia
... ----> o
..o ----> l
.ol ----> i
oli ----> v
liv ----> i
ivi ----> a
via ----> .
ava
... ----> a
..a ----> v
.av ----> a
ava ----> .
isabella
... ----> i
..i ----> s
.is ----> a
isa ----> b
sab ----> e
abe ----> l
bel ----> l
ell ----> a
lla ----> .
sophia
... ----> s
..s ----> o
.so ----> p
sop ----> h
oph ----> i
phi ----> a
hia ----> .


In [7]:
print(f" samples of X: {X[:2]}")
print(f" samples of Y: {Y[:2]}")
X.shape, X.dtype, Y.shape, Y.dtype # type: ignore

 samples of X: tensor([[0, 0, 0],
        [0, 0, 5]])
 samples of Y: tensor([ 5, 13])


(torch.Size([32, 3]), torch.int64, torch.Size([32]), torch.int64)

## embedding into smaller dimension


In [8]:

C = torch.randn((27, 2)) # look up table
# embedding one interger
C[5]

tensor([-1.2375,  0.1140])

In [9]:
# using one-hot encoding

emb_five = F.one_hot(torch.tensor(5), num_classes=27)
emb_five.dtype # (torch.int64) which is a long
emb_five.float() @ C

tensor([-1.2375,  0.1140])

## conclusion

---

The result is the same as the just indexing because matrix multiplication


In [10]:
## torch indexing is very powerful. you can actually use lists or tensors as indices and it should get everything there. my God, R poisoned everything!!
print(C[X].shape)
emb = C[X]
emb

# this first list of list of list from the output represents from [0,0,0] for the first word: tensor([[[-0.2244, -2.2367],
                                                                                                #[-0.2244, -2.2367],
                                                                                                #[-0.2244, -2.2367]],

torch.Size([32, 3, 2])


tensor([[[-0.3491, -0.6884],
         [-0.3491, -0.6884],
         [-0.3491, -0.6884]],

        [[-0.3491, -0.6884],
         [-0.3491, -0.6884],
         [-1.2375,  0.1140]],

        [[-0.3491, -0.6884],
         [-1.2375,  0.1140],
         [-3.2033, -0.3469]],

        [[-1.2375,  0.1140],
         [-3.2033, -0.3469],
         [-3.2033, -0.3469]],

        [[-3.2033, -0.3469],
         [-3.2033, -0.3469],
         [-0.1400,  0.5173]],

        [[-0.3491, -0.6884],
         [-0.3491, -0.6884],
         [-0.3491, -0.6884]],

        [[-0.3491, -0.6884],
         [-0.3491, -0.6884],
         [-1.9791, -0.4745]],

        [[-0.3491, -0.6884],
         [-1.9791, -0.4745],
         [-0.6213,  2.7754]],

        [[-1.9791, -0.4745],
         [-0.6213,  2.7754],
         [-0.3472, -0.4749]],

        [[-0.6213,  2.7754],
         [-0.3472, -0.4749],
         [-0.1199,  0.7997]],

        [[-0.3472, -0.4749],
         [-0.1199,  0.7997],
         [-0.3472, -0.4749]],

        [[-0.1199,  0

In [None]:
print(X[0,0]) # type: ignore
emb[0]

tensor(0)


tensor([[-0.3491, -0.6884],
        [-0.3491, -0.6884],
        [-0.3491, -0.6884]])

# The hidden layer


In [12]:
W1 = torch.randn(6,100) # 6 because we have 2-D embeddings for each char and there is 3 for each context. the 100 represent the number of neurons we have in the layer
b = torch.randn(100) # why not b = torch.randn(1, 100)?
b

tensor([-1.6626,  1.1382,  0.4414, -0.9961,  1.1724,  0.5139, -1.6073,  1.8050,
        -0.7537,  0.3273, -1.3357,  1.4887,  0.4429, -0.4079,  1.2124, -0.7905,
        -0.4999, -2.0080, -0.0712,  0.2288,  1.0121,  0.4022,  1.5563, -0.1665,
         0.9227, -0.6304,  1.6090,  0.1770, -0.4674, -0.3271,  0.5235,  0.2529,
         0.2663, -0.0508, -1.1151, -2.8558, -0.1888, -0.1678,  0.6549, -2.0097,
         0.5369,  2.3578, -0.5600, -0.0773,  1.2689, -0.0460, -0.7059, -1.7549,
        -2.1820, -1.3235, -1.9751, -0.4566, -0.9876, -0.3990,  1.2888, -0.7201,
        -1.4283, -0.3556, -0.4116, -0.8644,  1.5038,  1.0587, -2.0885,  0.2623,
        -0.2587, -0.4344,  1.1998,  0.6714,  0.0307, -0.7529,  1.4484, -1.5830,
         0.7927, -0.3378,  0.0556, -0.3708, -0.1410, -1.3243, -0.0030, -0.8132,
        -0.6422,  1.1896,  2.7139, -0.7196,  0.8066, -1.3035, -1.4769, -1.2684,
         0.0874, -2.0976, -1.4507,  1.1109,  1.8382, -0.2526, -0.0689, -0.6452,
        -0.2320, -1.1050,  1.9809,  1.02

In [13]:
# how are we going to do the matrix multiplication between a (32,3,2) and (3,2) tensor?

# first option, use concat to concat the input
print(emb[:,0,:])

emb[:,0,:].shape

tensor([[-0.3491, -0.6884],
        [-0.3491, -0.6884],
        [-0.3491, -0.6884],
        [-1.2375,  0.1140],
        [-3.2033, -0.3469],
        [-0.3491, -0.6884],
        [-0.3491, -0.6884],
        [-0.3491, -0.6884],
        [-1.9791, -0.4745],
        [-0.6213,  2.7754],
        [-0.3472, -0.4749],
        [-0.1199,  0.7997],
        [-0.3491, -0.6884],
        [-0.3491, -0.6884],
        [-0.3491, -0.6884],
        [-0.1400,  0.5173],
        [-0.3491, -0.6884],
        [-0.3491, -0.6884],
        [-0.3491, -0.6884],
        [-0.3472, -0.4749],
        [ 1.3498, -0.0150],
        [-0.1400,  0.5173],
        [-0.0414, -1.1877],
        [-1.2375,  0.1140],
        [-0.6213,  2.7754],
        [-0.3491, -0.6884],
        [-0.3491, -0.6884],
        [-0.3491, -0.6884],
        [ 1.3498, -0.0150],
        [-1.9791, -0.4745],
        [-1.1382,  1.0663],
        [-0.2293,  1.6071]])


torch.Size([32, 2])