In [None]:
from __future__ import print_function
from keras.models import Sequential, Model
from keras.layers import Dense, Activation
from keras.layers import Input, LSTM, RepeatVector
from keras.optimizers import RMSprop
from keras.utils.data_utils import get_file
from utils import read_txt_file, add_space
from copy import deepcopy
import numpy as np
import random
import sys

RANDOM_SEED = 1918
np.random.seed(RANDOM_SEED)

# load name file
all_names = read_txt_file("all_together/all_new.txt")
data_size = len(all_names)

chars = ' '.join(all_names)
chars = sorted(list(set(chars)))
chars_len = len(chars)
print('total chars:', chars_len)

char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))


# make all name strings up to a max lenght
maxlen = 22
for i, names in enumerate(all_names):
    all_names[i] = add_space(names, maxlen)


# randomly pair targets to inputs
targets = deepcopy(all_names)
random.shuffle(targets)

print('Vectorization...',)
X = np.zeros((len(all_names), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(all_names), maxlen, len(chars)), dtype=np.bool)
# y = np.zeros((len(all_names), len(chars)), dtype=np.bool)
for i, name_string in enumerate(all_names):
    for t, char in enumerate(name_string):
        X[i, t, char_indices[char]] = 1

for i, name_string in enumerate(targets):
    for t, char in enumerate(name_string):
        y[i, t, char_indices[char]] = 1

        # y[i, t, char_indices[char]] = 1
    # y[i, char_indices[targets[i]]] = 1
print('Success!')

# %% Models section
# input placeholder
input_strings = Input(shape=(maxlen, chars_len))

# encode the representation of the input
model_input = Input(shape=(maxlen, chars_len))
encoded = LSTM(64)(model_input)

# reconstruction of the input
decoded = RepeatVector(maxlen)(encoded)
decoded = LSTM(chars_len, return_sequences=True)(model_input)

# the model
seq_autoencoder = Model(model_input, decoded)
# encoder = Model(model_input, encoded)
seq_autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy')

# show model summary
seq_autoencoder.summary()


Using TensorFlow backend.


total chars: 27
Vectorization...
Success!
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 22, 27)            0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 22, 27)            5940      
Total params: 5,940
Trainable params: 5,940
Non-trainable params: 0
_________________________________________________________________


In [None]:
# fit model and print iterations
iterations = 100
for iter in range(iterations):
    print('Iteration:', iter)
    seq_autoencoder.fit(X, y,
                        epochs=10,
                        batch_size=128,
                        shuffle=True)

    # %% tests
    test_names = ['ayamatanga', 
                  'omoluorogbo', 
                  'lindsay', 
                  'pitcher',
                  'reggie',
                  'yokohama']
    X_test = np.zeros((len(test_names), maxlen, len(chars)), dtype=np.bool)
    for i, name_string in enumerate(test_names):
        for t, char in enumerate(name_string):
            X_test[i, t, char_indices[char]] = 1

    preds = seq_autoencoder.predict(X_test)

    print('results:')
    for i, prediction in enumerate(preds):
        pred_char_index = np.argmax(prediction, 1)
        pred_name = ''
        for t, char_id in enumerate(pred_char_index):
            pred_name += indices_char[char_id]

        print('Given name:\t', test_names[i], '\t\tNew name: \t', pred_name)
    
    
    # shuffle targets for next iteration
    random.shuffle(targets)
    y = np.zeros((len(all_names), maxlen, len(chars)), dtype=np.bool)
    for i, name_string in enumerate(targets):
        for t, char in enumerate(name_string):
            y[i, t, char_indices[char]] = 1


Iteration: 0
Epoch 1/2
Epoch 2/2
results:
Given name:	 ayamatanga 		New name: 	 irr                   
Given name:	 omoluorogbo 		New name: 	 asaa                  
Given name:	 lindsay 		New name: 	 pae                   
Given name:	 pitcher 		New name: 	 aaas                  
Given name:	 reggie 		New name: 	 n                     
Given name:	 yokohama 		New name: 	 paea                  
Iteration: 1
Epoch 1/2
Epoch 2/2
results:
Given name:	 ayamatanga 		New name: 	 area                  
Given name:	 omoluorogbo 		New name: 	 aaae                  
Given name:	 lindsay 		New name: 	 vae                   
Given name:	 pitcher 		New name: 	 aaaa                  
Given name:	 reggie 		New name: 	 raee                  
Given name:	 yokohama 		New name: 	 paea                  
Iteration: 2
Epoch 1/2

In [None]:
# save model
seq_autoencoder.save('checkpoint_save')