In [1]:
import numpy as np
import pandas as pd
import requests as rqst
import torch
import matplotlib.pyplot as plt
import torch.nn.functional as F

In [2]:
words = open('names.txt').read().splitlines()
words[:8]

['emma', 'olivia', 'ava', 'isabella', 'sophia', 'charlotte', 'mia', 'amelia']

In [8]:
chars = sorted(list(set(''.join(words))))
chars

['a',
 'b',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'j',
 'k',
 'l',
 'm',
 'n',
 'o',
 'p',
 'q',
 'r',
 's',
 't',
 'u',
 'v',
 'w',
 'x',
 'y',
 'z']

In [9]:
# build index look up table
stoi = {char: i + 1 for i, char in enumerate(chars)}
stoi['.'] = 0
itos = {i : char for char, i in stoi.items()}
itos

{1: 'a',
 2: 'b',
 3: 'c',
 4: 'd',
 5: 'e',
 6: 'f',
 7: 'g',
 8: 'h',
 9: 'i',
 10: 'j',
 11: 'k',
 12: 'l',
 13: 'm',
 14: 'n',
 15: 'o',
 16: 'p',
 17: 'q',
 18: 'r',
 19: 's',
 20: 't',
 21: 'u',
 22: 'v',
 23: 'w',
 24: 'x',
 25: 'y',
 26: 'z',
 0: '.'}

In [12]:
block_size = 3
X, Y = [], []
for word in words[:5]:
    print(word)
    context = [0]*block_size
    for char in word + ".":
        ix = stoi[char]
        X.append(context)
        Y.append(ix)
        print(''.join(itos[i] for i in context), '---->', itos[ix])
        context = context[1:] + [ix]
X = torch.tensor(X)
Y = torch.tensor(Y)

emma
... ----> e
..e ----> m
.em ----> m
emm ----> a
mma ----> .
olivia
... ----> o
..o ----> l
.ol ----> i
oli ----> v
liv ----> i
ivi ----> a
via ----> .
ava
... ----> a
..a ----> v
.av ----> a
ava ----> .
isabella
... ----> i
..i ----> s
.is ----> a
isa ----> b
sab ----> e
abe ----> l
bel ----> l
ell ----> a
lla ----> .
sophia
... ----> s
..s ----> o
.so ----> p
sop ----> h
oph ----> i
phi ----> a
hia ----> .


In [26]:
print(f" samples of X: {X[:2]}")
print(f" samples of Y: {Y[:2]}")
X.shape, X.dtype, Y.shape, Y.dtype

 samples of X: tensor([[0, 0, 0],
        [0, 0, 5]])
 samples of Y: tensor([ 5, 13])


(torch.Size([32, 3]), torch.int64, torch.Size([32]), torch.int64)

## embedding into smaller dimension


In [17]:

C = torch.randn((27, 2)) # look up table
# embedding one interger
C[5]

tensor([-0.4507,  0.5883])

In [27]:
# using one-hot encoding

emb_five = F.one_hot(torch.tensor(5), num_classes=27)
emb_five.dtype # (torch.int64) which is a long
emb_five.float() @ C

tensor([-0.4507,  0.5883])

## conclusion

---

The result is the same as the just indexing because matrix multiplication


In [None]:
## torch indexing is very powerful. you can actually use lists or tensors as indices and it should get everything there. my God, R poisoned everything!!
print(C[X].shape)
emb = C[X]
emb

# this first list of list of list from the output represents from [0,0,0] for the first word: tensor([[[-0.2244, -2.2367],
                                                                                                #[-0.2244, -2.2367],
                                                                                                #[-0.2244, -2.2367]],

torch.Size([32, 3, 2])


tensor([[[-0.2244, -2.2367],
         [-0.2244, -2.2367],
         [-0.2244, -2.2367]],

        [[-0.2244, -2.2367],
         [-0.2244, -2.2367],
         [-0.4507,  0.5883]],

        [[-0.2244, -2.2367],
         [-0.4507,  0.5883],
         [ 0.8921,  0.6443]],

        [[-0.4507,  0.5883],
         [ 0.8921,  0.6443],
         [ 0.8921,  0.6443]],

        [[ 0.8921,  0.6443],
         [ 0.8921,  0.6443],
         [ 0.4130, -1.7827]],

        [[-0.2244, -2.2367],
         [-0.2244, -2.2367],
         [-0.2244, -2.2367]],

        [[-0.2244, -2.2367],
         [-0.2244, -2.2367],
         [-0.4456, -1.0036]],

        [[-0.2244, -2.2367],
         [-0.4456, -1.0036],
         [ 0.2209,  0.8026]],

        [[-0.4456, -1.0036],
         [ 0.2209,  0.8026],
         [ 1.8280, -0.3603]],

        [[ 0.2209,  0.8026],
         [ 1.8280, -0.3603],
         [ 0.6267, -0.1460]],

        [[ 1.8280, -0.3603],
         [ 0.6267, -0.1460],
         [ 1.8280, -0.3603]],

        [[ 0.6267, -0

In [39]:
print(X[0,0])
emb[0]

tensor(0)


tensor([[-0.2244, -2.2367],
        [-0.2244, -2.2367],
        [-0.2244, -2.2367]])

# The hidden layer

In [42]:
W1 = torch.randn(6,100) # 6 because we have 2-D embeddings for each char and there is 3 for each context. the 100 represent the number of neurons we have in the layer
b = torch.randn(100) # why not b = torch.randn(1, 100)?
b

tensor([ 0.5379,  0.1178, -0.3910,  1.0925,  1.7204, -0.5543,  0.4283,  0.1344,
         1.2667,  0.4174, -1.1313,  1.3653,  0.0847, -0.1801,  0.4214,  2.0202,
        -0.1225,  0.2508,  0.8966,  0.4803,  0.3166,  1.1976, -0.2046,  0.8643,
        -0.2389, -0.9995,  0.9044,  0.0677,  0.2839,  0.0499,  1.0757,  0.2065,
        -2.5141,  0.1581,  0.1207, -0.8716, -0.4637,  0.2155,  0.2319, -0.2770,
         0.1123, -0.0778,  0.8147, -1.5275,  1.2225,  0.3626,  0.7838, -0.5317,
         1.2423, -0.3128,  0.1466, -0.2406,  0.3843, -2.9398, -0.5915, -0.0422,
         1.2915,  1.2169, -0.8571, -1.8310, -1.2037, -0.3812,  1.3656,  0.8972,
         0.2441, -0.4856,  0.4529,  0.8289,  0.7229,  0.6773, -0.7388,  1.2543,
         0.4733,  1.0236, -0.0628, -1.5502,  0.6762,  0.6232, -2.0483, -1.5754,
        -0.2245, -0.2473, -0.5865,  0.2130, -1.8506, -1.8898, -1.0634, -1.1460,
        -0.6928,  0.3696,  0.6713, -0.0932, -0.8044, -0.8232, -1.6594, -0.8658,
         0.1671,  0.7748,  0.5460, -0.75

In [47]:
# how are we going to do the matrix multiplication between a (32,3,2) and (3,2) tensor?

# first option, use concat to concat the input
print(emb[:,0,:])

emb[:,0,:].shape

tensor([[-0.2244, -2.2367],
        [-0.2244, -2.2367],
        [-0.2244, -2.2367],
        [-0.4507,  0.5883],
        [ 0.8921,  0.6443],
        [-0.2244, -2.2367],
        [-0.2244, -2.2367],
        [-0.2244, -2.2367],
        [-0.4456, -1.0036],
        [ 0.2209,  0.8026],
        [ 1.8280, -0.3603],
        [ 0.6267, -0.1460],
        [-0.2244, -2.2367],
        [-0.2244, -2.2367],
        [-0.2244, -2.2367],
        [ 0.4130, -1.7827],
        [-0.2244, -2.2367],
        [-0.2244, -2.2367],
        [-0.2244, -2.2367],
        [ 1.8280, -0.3603],
        [ 0.5337, -0.5609],
        [ 0.4130, -1.7827],
        [-0.1140, -1.3774],
        [-0.4507,  0.5883],
        [ 0.2209,  0.8026],
        [-0.2244, -2.2367],
        [-0.2244, -2.2367],
        [-0.2244, -2.2367],
        [ 0.5337, -0.5609],
        [-0.4456, -1.0036],
        [ 0.9431, -0.5273],
        [ 0.5975,  1.1457]])


torch.Size([32, 2])