In [None]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn.functional as F

words = open("names.txt", "r").read().splitlines()
charlist = sorted(list(set("".join(words))))
charlist = ['.'] + charlist
numfromchar = {ch: k for k,ch in enumerate(charlist)}
charfromnum = {k: ch for k,ch in enumerate(charlist)}

#(Hyper)Parameters
nc      = len(charlist)
nw      = len(words)
embdim  = 2
blocksz = 3
firstsz  = embdim * blocksz
hiddensz = 100


In [None]:
X = []
Y = []
for w in words:
    t = ('.' * blocksz) + w + '.'
    for k in range(blocksz, len(t)):
        input  = t[k - blocksz : k]
        output = t[k]
        X.append([numfromchar[ch] for ch in input])
        Y.append(numfromchar[output])
        #print(input + " -> " + output)

X = torch.tensor(X)
Y = torch.tensor(Y)
#print(X.shape, X.dtype, Y.shape, Y.dtype)

In [None]:
C = torch.randn([nc, embdim])
emb = C[X]

In [None]:
W1 = torch.randn(firstsz, hiddensz)
b1  = torch.randn(firstz)
h = torch.tanh(emb.view(-1,firstsz) @ W1 + b1)

In [None]:
W2 = torch.randn(hiddensz, nc)
b2 = torch.randn(nc)
logits = h @ W2 + b2
counts = logits.exp()
probs = counts / counts.sum(1, keepdims=True)
loss = -probs[torch.arange(32), Y].log().mean()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn.functional as F

words = open("names.txt", "r").read().splitlines()
charlist = sorted(list(set("".join(words))))
charlist = ['.'] + charlist
numfromchar = {ch: k for k,ch in enumerate(charlist)}
charfromnum = {k: ch for k,ch in enumerate(charlist)}

#(Hyper)Parameters
nc      = len(charlist)
nw      = len(words)
embdim  = 2
blocksz = 3
firstsz  = embdim * blocksz
hiddensz = 100
learning_rate = 0.1


### Preprocess the raw data
X, Y = list(), list()
for w in words:
    #print("\n");print(w)
    t = ('.' * blocksz) + w + '.'
    for k in range(blocksz, len(t)):
        input  = t[k - blocksz : k]
        output = t[k]
        X.append([numfromchar[ch] for ch in input])
        Y.append(numfromchar[output])
        #print(input + " -> " + output)
X, Y = torch.tensor(X), torch.tensor(Y)



#Initialization
g  = torch.Generator().manual_seed(2147483647)
C  = torch.randn((nc, embdim),generator=g)
W1 = torch.randn((firstsz, hiddensz), generator = g)
b1 = torch.randn(hiddensz, generator = g)
W2 = torch.randn((hiddensz, nc), generator = g)
b2 = torch.randn(nc, generator = g)
params = [C, W1, b1, W2, b2]
for p in params: p.requires_grad = True


for _ in range(10):
    emb = C[X]
    h = torch.tanh(emb.view(-1, firstsz) @ W1 + b1)
    logits = h @ W2 + b2
    loss  = F.cross_entropy(logits, Y)
    print(f"Loss:{loss.item()}")
    for p in params: p.grad = None
    loss.backward()
    for p in params:
        p.data -= learning_rate * p.grad

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn.functional as F

words = open("names.txt", "r").read().splitlines()
charlist = sorted(list(set("".join(words))))
charlist = ['.'] + charlist
numfromchar = {ch: k for k,ch in enumerate(charlist)}
charfromnum = {k: ch for k,ch in enumerate(charlist)}

#(Hyper)Parameters
nc      = len(charlist)
nw      = len(words)
embdim  = 2
blocksz = 3
firstsz  = embdim * blocksz
hiddensz = 100
batchsz = 32
learning_rate = 0.1
epochs = 10


### Preprocess the raw data
X, Y = list(), list()
for w in words:
    #print("\n");print(w)
    t = ('.' * blocksz) + w + '.'
    for k in range(blocksz, len(t)):
        input  = t[k - blocksz : k]
        output = t[k]
        X.append([numfromchar[ch] for ch in input])
        Y.append(numfromchar[output])
        #print(input + " -> " + output)
X, Y = torch.tensor(X), torch.tensor(Y)



#Initialization
g  = torch.Generator().manual_seed(2147483647)
C  = torch.randn((nc, embdim),generator=g)
W1 = torch.randn((firstsz, hiddensz), generator = g)
b1 = torch.randn(hiddensz, generator = g)
W2 = torch.randn((hiddensz, nc), generator = g)
b2 = torch.randn(nc, generator = g)
params = [C, W1, b1, W2, b2]
for p in params: p.requires_grad = True


for _ in range(epochs):
    batch_idx = torch.randint(0, X.shape[0], (batchsz,))
    emb = C[X[batch_idx]]
    h = torch.tanh(emb.view(-1, firstsz) @ W1 + b1)
    logits = h @ W2 + b2
    loss  = F.cross_entropy(logits, Y[batch_idx])
    print(f"Loss:{loss.item()}")
    for p in params: p.grad = None
    loss.backward()
    for p in params:
        p.data -= learning_rate * p.grad

In [None]:
### Learning rate

import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn.functional as F

words = open("names.txt", "r").read().splitlines()
charlist = sorted(list(set("".join(words))))
charlist = ['.'] + charlist
numfromchar = {ch: k for k,ch in enumerate(charlist)}
charfromnum = {k: ch for k,ch in enumerate(charlist)}

#(Hyper)Parameters
nc      = len(charlist)
nw      = len(words)
embdim  = 2
blocksz = 3
firstsz  = embdim * blocksz
hiddensz = 100
batchsz = 32
learning_rates = torch.logspace(-3,0,50)
epochs = 2000


### Preprocess the raw data
X, Y = list(), list()
for w in words:
    #print("\n");print(w)
    t = ('.' * blocksz) + w + '.'
    for k in range(blocksz, len(t)):
        input  = t[k - blocksz : k]
        output = t[k]
        X.append([numfromchar[ch] for ch in input])
        Y.append(numfromchar[output])
        #print(input + " -> " + output)
X, Y = torch.tensor(X), torch.tensor(Y)



#Initialization
g  = torch.Generator().manual_seed(2147483647)
C  = torch.randn((nc, embdim),generator=g)
W1 = torch.randn((firstsz, hiddensz), generator = g)
b1 = torch.randn(hiddensz, generator = g)
W2 = torch.randn((hiddensz, nc), generator = g)
b2 = torch.randn(nc, generator = g)
params = [C, W1, b1, W2, b2]
for p in params: p.requires_grad = True


res = [1000.0] * len(learning_rates)
for lridx in range(len(learning_rates)):
    learning_rate = learning_rates[lridx]
    for _ in range(epochs):
        batch_idx = torch.randint(0, X.shape[0], (batchsz,))
        emb = C[X[batch_idx]]
        h = torch.tanh(emb.view(-1, firstsz) @ W1 + b1)
        logits = h @ W2 + b2
        loss  = F.cross_entropy(logits, Y[batch_idx])
        #print(f"Loss:{loss.item()}")
        for p in params: p.grad = None
        loss.backward()
        for p in params:
            p.data -= learning_rate * p.grad

    print(f"Learningrate:{learning_rate} -> Loss:{loss.item()}")
    res[lridx] = loss.item()

plt.plot(learning_rates, res)

In [None]:
### Train/Validation/Test Split

import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn.functional as F

words = open("names.txt", "r").read().splitlines()
charlist = sorted(list(set("".join(words))))
charlist = ['.'] + charlist
numfromchar = {ch: k for k,ch in enumerate(charlist)}
charfromnum = {k: ch for k,ch in enumerate(charlist)}

#(Hyper)Parameters
nc      = len(charlist)
nw      = len(words)
embdim  = 2
blocksz = 3
firstsz  = embdim * blocksz
hiddensz = 100
batchsz = 32
learning_rate = 0.1
epochs = 50000
split_fractions = [0.8, 0.9, 1.0]

### Preprocess the raw data
X, Y = list(), list()
for w in words:
    #print("\n");print(w)
    t = ('.' * blocksz) + w + '.'
    for k in range(blocksz, len(t)):
        input  = t[k - blocksz : k]
        output = t[k]
        X.append([numfromchar[ch] for ch in input])
        Y.append(numfromchar[output])
        #print(input + " -> " + output)
X, Y = torch.tensor(X), torch.tensor(Y)


### Train/Validation/Test split of X and Y
import random
random.seed(42)

idxs = np.random.permutation(len(words))
ntr = int(split_fractions[0] * len(words))
nvd = int(split_fractions[1] * len(words))
tridxs  = idxs[:ntr]
vdidxs  = idxs[ntr:nvd]
tstidxs = idxs[nvd:]

Xtr, Ytr = X[tridxs], Y[tridxs]
Xvd, Yvd = X[vdidxs], Y[vdidxs]
Xtst, Ytst = X[tstidxs], Y[tstidxs]


#Initialization
g  = torch.Generator().manual_seed(2147483647)
C  = torch.randn((nc, embdim),generator=g)
W1 = torch.randn((firstsz, hiddensz), generator = g)
b1 = torch.randn(hiddensz, generator = g)
W2 = torch.randn((hiddensz, nc), generator = g)
b2 = torch.randn(nc, generator = g)
params = [C, W1, b1, W2, b2]
for p in params: p.requires_grad = True

###Training
for _ in range(epochs):
    batch_idx = torch.randint(0, Xtr.shape[0], (batchsz,))
    emb = C[Xtr[batch_idx]]
    h = torch.tanh(emb.view(-1, firstsz) @ W1 + b1)
    logits = h @ W2 + b2
    loss  = F.cross_entropy(logits, Ytr[batch_idx])
    #print(f"Loss:{loss.item()}")
    for p in params: p.grad = None
    loss.backward()
    for p in params:
        p.data -= torch.tensor(learning_rate) * p.grad
    #print(f"Training Loss:{loss.item()}")


###Evaluation
emb = C[Xtr]
h = torch.tanh(emb.view(-1, firstsz) @ W1 + b1)
logits = h @ W2 + b2
loss  = F.cross_entropy(logits, Ytr)
print(f"Training Loss:{loss.item()}")
emb = C[Xvd]
h = torch.tanh(emb.view(-1, firstsz) @ W1 + b1)
logits = h @ W2 + b2
loss  = F.cross_entropy(logits, Yvd)
print(f"Validation Loss:{loss.item()}")


In [None]:
###Visualize Embeddings

plt.figure(figsize=(12,12))
plt.scatter(C[:,0].data, C[:,1].data,s=200)
for k in range(C.shape[0]):
    plt.text(C[k, 0].item(), C[k, 1].item(), charfromnum[k], ha="center", va="center",color="white")
plt.grid('minor')

In [None]:
### Use the network to actually make more :-)

numout = 10
for _ in range(numout):
    input  = [0] * blocksz
    output = []
    while True:
        emb = C[input]
        h = torch.tanh(emb.view(-1, firstsz) @ W1 + b1)
        logits = h @ W2 + b2
        probs = F.softmax(logits,dim = 1)
        pred = torch.multinomial(probs, num_samples=1,generator=g).item()
        input = input[1:] + [pred]
        if not pred: break
        output.append(pred)

    res = "".join([charfromnum[k] for k in output])
    print(res)
