In [15]:
from net import MLP, grad_descent, parse_txt, char_tokenize, itos, stoi
from net.util import tanh, dtanh, cross_entropy, SEED

import numpy as np
import math

In [16]:
# Hyper Parameters
BLOCK_SIZE = 4
FEATURES = 64
VOCAB_SIZE = 27
size = (BLOCK_SIZE * FEATURES, 256, VOCAB_SIZE)
LR = 0.1
ALPHA = 1e-4
STEPS = 200000
BATCH_SIZE = 30
TEMPERATURE = 0.8

In [17]:
# Name data
names = parse_txt("../data/names.txt")
xs, ys, vocab = char_tokenize(names, BLOCK_SIZE)
str_to_int = stoi(vocab)
int_to_str = itos(vocab)


# Splits
b1 = math.floor(len(xs) * 0.8)
b2 = math.floor(len(xs) * 0.9)
x_train = xs[:b1]
y_train = ys[:b1]
x_test = xs[b1:b2]
y_test = ys[b1:b2]
x_dev = xs[b2:]
y_dev = ys[b2:]

In [18]:
# Initialize emb matrix
emb = np.random.randn(len(vocab), FEATURES)

# Initialize model
name_net = MLP(size, tanh, dtanh, emb)

In [19]:
# Train
grad_descent(name_net, x_train, y_train, STEPS, BATCH_SIZE, LR, ALPHA)

# Hyperparam Training
# grad_descent(name_net, x_dev, y_dev, ITERS, EPOCHS, BATCH_SIZE, LR, ALPHA)

step 0 | loss: 0.1278
step 1000 | loss: 0.0972
step 2000 | loss: 0.0834
step 3000 | loss: 0.0820
step 4000 | loss: 0.0791
step 5000 | loss: 0.0912
step 6000 | loss: 0.0690
step 7000 | loss: 0.0698
step 8000 | loss: 0.0836
step 9000 | loss: 0.0677
step 10000 | loss: 0.0751
step 11000 | loss: 0.1011
step 12000 | loss: 0.0713
step 13000 | loss: 0.0718
step 14000 | loss: 0.0707
step 15000 | loss: 0.0732
step 16000 | loss: 0.0856
step 17000 | loss: 0.0677
step 18000 | loss: 0.0660
step 19000 | loss: 0.0912
step 20000 | loss: 0.0959
step 21000 | loss: 0.0699
step 22000 | loss: 0.0642
step 23000 | loss: 0.0755
step 24000 | loss: 0.0786
step 25000 | loss: 0.0951
step 26000 | loss: 0.0747
step 27000 | loss: 0.0754
step 28000 | loss: 0.0784
step 29000 | loss: 0.0788
step 30000 | loss: 0.0847
step 31000 | loss: 0.0789
step 32000 | loss: 0.0627
step 33000 | loss: 0.0940
step 34000 | loss: 0.0733
step 35000 | loss: 0.0856
step 36000 | loss: 0.0783
step 37000 | loss: 0.0660
step 38000 | loss: 0.0817

In [20]:
# Check entropy
name_net.forward(x_train)
preds = np.max(name_net.layers[-1].value, axis=1)
float(cross_entropy(preds, y_train))

10.106314837614564

In [23]:
# Generate
for _ in range(20):
    input = [str_to_int['.']] * BLOCK_SIZE # SEED
    out = ""
    while '.' not in out:
        name_net.forward(np.array(input), temperature=TEMPERATURE)
        probs = name_net.layers[-1].value
        i = np.random.choice(VOCAB_SIZE, p=probs.flatten())
        out += int_to_str[i]
        input = input[1:] + [i]
    print(out[:-1])

nara
alette
kaine
zelei
carley
kristyn
alai
patrin
khyra
hamada
carly
daron
nalaya
evalin
aneima
milea
ilyn
kesha
anna
avalee
