In [12]:
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
from tqdm import tqdm 
import random
%matplotlib inline

In [13]:
#Define word dataset and put it into dictionary
word_set = open('names.txt', 'r').read().splitlines()
print(word_set[:10]) #print 10 words
print(len(word_set))

['emma', 'olivia', 'ava', 'isabella', 'sophia', 'charlotte', 'mia', 'amelia', 'harper', 'evelyn']
32033


In [25]:
#Build vocabulary dictionary (Integer/Character Mappings)
chars = sorted(list(set(''.join(word_set))))
S_to_I = {s: i+1 for i,s in enumerate(chars)} #give each char mapping of i + 1
S_to_I['.'] = 0 #map end of sequence token
I_to_S = {i: s for s,i in S_to_I.items()} #reverse S to I
vocab_size = len(I_to_S)
print(vocab_size)

27


In [26]:
#Build dataset with Context : How many characters needed to predict the next character?
block_size = 3

def build_dataset(words):
    X, Y = [],[]
    for name in tqdm(words, desc = "Building dataset"):
        context = [0] * block_size 
        #Loop through chars, add . to symbolize end token
        for ch in name + '.':
            index = S_to_I[ch]
            X.append(context) #input context
            Y.append(index) #output integer integer to next character
            context = context[1:] + [index] #append context
    X = torch.tensor(X, dtype=torch.long)
    Y = torch.tensor(Y, dtype=torch.long)
    print(f"Shapes X: {X.shape}, Y: {Y.shape}")
    return X,Y


#Split up data and build datasets for Training, Validation, and Testing
random.seed(29)
random.shuffle(word_set)
idx1 = int(0.8 * len(word_set))
idx2 = int(0.9 * len(word_set))

train_x, train_y = build_dataset(word_set[:idx1]) #First 80 percent for Training
dev_x, dev_y = build_dataset(word_set[idx1:idx2]) #
test_x, test_y = build_dataset(word_set[idx2:]) #10 percent to test



Building dataset: 100%|███████████████████████████████████████| 25626/25626 [00:00<00:00, 157468.90it/s]


Shapes X: torch.Size([182496, 3]), Y: torch.Size([182496])


Building dataset: 100%|█████████████████████████████████████████| 3203/3203 [00:00<00:00, 551085.23it/s]


Shapes X: torch.Size([22913, 3]), Y: torch.Size([22913])


Building dataset: 100%|█████████████████████████████████████████| 3204/3204 [00:00<00:00, 505512.72it/s]

Shapes X: torch.Size([22737, 3]), Y: torch.Size([22737])





In [35]:
#MLP: Embedding Layer, Hidden Layer
n_embd = 16 #embedding table vectors
n_hidden = 200 #hidden layer nuerons
g = torch.Generator().manual_seed(28482948204) 


#Parameters 
#Embedding Lookup Table C
C = torch.randn((vocab_size, n_embd), generator=g) #each row is characters 10 dim vector
W1 = torch.randn((n_embd * block_size, n_hidden), generator=g) * (5/3) / ((n_embd * block_size) ** 0.5) #concatenated embeddings into hidden
W2 = torch.randn((n_hidden, vocab_size), generator=g) * 0.01 #hidden layer to output logits for each character
b2 = torch.zeros((1, vocab_size), generator=g) #one bias per character


#Paramters for Batch Norm to normalize inputs
bnorm_gain = torch.ones((1, n_hidden)) #learnable scale factor gamma
bnorm_bias = torch.zeros((1, n_hidden)) #learned shift factor beta
running_mean = torch.zeros((1, n_hidden)) #running mean
running_std = torch.zeros((1, n_hidden)) #running standard deviation

network_params = [C, W1, W2, b2, bnorm_gain, bnorm_bias]
print(f"Number of Parameters in model {sum(p.nelement() for p in network_params)}")
#ensure that gradients are calculated for back prop
for param in network_params:
    param.requires_grad = True



TypeError: zeros() received an invalid combination of arguments - got (tuple, generator=torch._C.Generator), but expected one of:
 * (tuple of ints size, *, tuple of names names, torch.dtype dtype, torch.layout layout, torch.device device, bool pin_memory, bool requires_grad)
 * (tuple of ints size, *, Tensor out, torch.dtype dtype, torch.layout layout, torch.device device, bool pin_memory, bool requires_grad)


tensor([20,  1, 21,  ..., 14,  1,  0])