While the original exercise uses the numpy built methods from the previous exercise to construct a dino-name generator, I will try to use Keras.

In [1]:
import numpy as np
import keras
from keras.models import Model, load_model
from keras.layers import Input, SimpleRNN, Dense, Lambda, LSTM, Masking, TimeDistributed
from keras.callbacks import ReduceLROnPlateau

Using TensorFlow backend.


How many charactars in the our data?

In [2]:
data = open('dinos.txt', 'r').read()
data= data.lower()
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)
print('There are %d total characters and %d unique characters in your data.' % (data_size, vocab_size))

There are 19909 total characters and 27 unique characters in your data.


In [3]:
char_to_ix = { ch:i for i,ch in enumerate(sorted(chars)) }
ix_to_char = { i:ch for i,ch in enumerate(sorted(chars)) }
print(ix_to_char)

{0: '\n', 1: 'a', 2: 'b', 3: 'c', 4: 'd', 5: 'e', 6: 'f', 7: 'g', 8: 'h', 9: 'i', 10: 'j', 11: 'k', 12: 'l', 13: 'm', 14: 'n', 15: 'o', 16: 'p', 17: 'q', 18: 'r', 19: 's', 20: 't', 21: 'u', 22: 'v', 23: 'w', 24: 'x', 25: 'y', 26: 'z'}


Let's create our training set:

In [4]:
# open the file again, and read the lines
with open("dinos.txt") as f:
    examples = f.readlines()
examples = [x.lower().strip() for x in examples]
# Shuffle list of all dinosaur names
np.random.seed(210)
np.random.shuffle(examples)

In [5]:
# compute the max length of a word in data
maxLen = len(max(examples, key=len))
print(maxLen)

26


In [6]:
m = len(examples)
feature_size = 1  # we use the actual index, and not 1-hot-encoding
n_a = 50  # latent space dimensions

X = np.zeros((m, maxLen, feature_size))
Y = np.zeros((m, maxLen, feature_size))

for i in range(m):
    word = examples[i]
    l = len(word)        
    X[i, 0:l, :] = np.array([char_to_ix[ch] for ch in word]).reshape(-1, 1)
    Y[i, 0:l, :] = np.concatenate([X[i, 1:l, :], [[char_to_ix["\n"]]]])

In [7]:
# create the model
def simple_model(input_shape, n_a):
    inp = Input(shape=input_shape, name='x')
    X = Masking(mask_value=0)(inp)
    X = SimpleRNN(n_a, return_sequences=True, name='rnn_cell')(X)
    X = Dense(vocab_size, activation='softmax', name='dense_output')(X)
    model = Model(inputs=inp, outputs=X)
    return model

try:
    model = load_model('simple_rnn.h5')
except:
    model = simple_model(input_shape=(maxLen, feature_size))
    # Use sparse loss due to not using 1-hot-encoding
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['sparse_categorical_accuracy'])
    # learning_rate_reduction = ReduceLROnPlateau(monitor='sparse_categorical_accuracy', patience=4, verbose=1, factor=0.5, min_lr=0.00001)
    model.fit(X, Y, epochs = 200, batch_size = 128) # , callbacks=[learning_rate_reduction]
    model.save('simple_rnn.h5')

We got 45% accuracy after 100-200 epochs.

Now let's build an "inference" generator:

In [8]:
def get_inference_model(model):
    inp = Input(shape=(1, feature_size)) # regular input - char
    state_h = Input(shape=(n_a,))  # state input, a n_a dimensions vector
    
    inf_rnn = SimpleRNN(n_a, return_sequences=True, return_state=True)   
    X, h = inf_rnn(inp, initial_state=[state_h])
    dense = model.get_layer(name='dense_output')  # use the already trained dense layer
    X = dense(X)
    model_infer = Model(inputs=[inp, state_h], outputs=[X, h])    
    rnn = model.get_layer(name='rnn_cell')
    inf_rnn.set_weights(rnn.get_weights()) # load the weights of the trained rnn
    return model_infer

In [9]:
def generate_name(inf_model, seq_len=20):
    x = np.random.randint(1, vocab_size, size=(1, 1, 1))
    name = []
    state_h = np.zeros((1,n_a))
    for _ in range(seq_len):
        name.append(ix_to_char[x[0][0][0]])
        prob, state_h = inf_model.predict([x, state_h])  # get the softmax output, handle as a distribution
        pred = np.random.choice(np.arange(vocab_size), p=prob.ravel())  # sample from the distribution 
        x = pred.reshape(1,1,1)
        if x[0][0][0] == 0:  # if we reach a new line, we break
            break
        
    return "".join(name)

In [10]:
inf_model = get_inference_model(model)

In [11]:
generate_name(inf_model, maxLen)

'rueinpigus'

Not bad. Let's try LSTM:

In [12]:
# create the model
n_a = 128
def lstm_model(input_shape):
    inp = Input(shape=input_shape)
    X = Masking(mask_value=0)(inp)
    X = LSTM(n_a, return_sequences=True, name='lstm_cell')(X)
    X = Dense(vocab_size, activation='softmax', name='dense_output_lstm')(X)
    model = Model(inputs=inp, outputs=X)
    return model

In [13]:
try: 
    lstm = load_model('lstm.h5')
except:
    lstm = lstm_model(input_shape=(maxLen, feature_size))
    lstm.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['sparse_categorical_accuracy'])
    lstm.fit(X, Y, epochs = 200, batch_size = 128)
    lstm.save('lstm.h5')

In [14]:
def get_inference_model_lstm(model):
    inp = Input(shape=(1, feature_size)) # regular input - char
    state_h = Input(shape=(n_a,))  # state input, a n_a dimensions vector
    state_c = Input(shape=(n_a,))  # memory input, a n_a dimensions vector

    inf_lstm = LSTM(n_a, return_sequences=True, return_state=True)   
    X, h, c = inf_lstm(inp, initial_state=[state_h, state_c])
    dense = model.get_layer(name='dense_output_lstm')  # use the already trained dense layer
    X = dense(X)
    model_infer = Model(inputs=[inp, state_h, state_c], outputs=[X, h, c])    
    lstm = model.get_layer(name='lstm_cell')
    inf_lstm.set_weights(lstm.get_weights()) # load the weights of the trained rnn
    return model_infer

In [15]:
def generate_name_lstm(inf_model, seq_len=20):
    x = np.random.randint(1, vocab_size, size=(1, 1, 1))
    name = []
    state_h = np.zeros((1,n_a))
    state_c = np.zeros((1,n_a))
    for _ in range(seq_len):
        name.append(ix_to_char[x[0][0][0]])
        prob, state_h, state_c = inf_model.predict([x, state_h, state_c])  # get the softmax output, handle as a distribution
        pred = np.random.choice(np.arange(vocab_size), p=prob.ravel())  # sample from the distribution 
        x = pred.reshape(1,1,1)
        if x[0][0][0] == 0:  # if we reach a new line, we break
            break
        
    return "".join(name)

In [16]:
inf_model = get_inference_model_lstm(lstm)

In [17]:
generate_name_lstm(inf_model, maxLen)

'xleosaurus'

Let's try to use an even more complex model

In [18]:
def complex_model(input_shape, n_a = 128):
    inp = Input(shape=input_shape, name='x')
    X = Masking(mask_value=0)(inp)
    X = LSTM(n_a, return_sequences=True, name='lstm_1')(X)
    X = LSTM(n_a//2, return_sequences=True, name='lstm_2')(X)
    X = LSTM(n_a//3, return_sequences=True, name='lstm_3')(X)
    X = Dense(vocab_size, activation='softmax', name='dense_output_lstm')(X)
    model = Model(inputs=inp, outputs=X)
    return model

In [19]:
try:
    complx = load_model('complex.h5')
except:
    complx = complex_model(input_shape=(maxLen, feature_size))
    complx.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['sparse_categorical_accuracy'])
    complx.fit(X, Y, epochs = 200, batch_size = 256)
    complx.save('complex.h5')

In [20]:
def get_inference_model_complex(model):
    inp = Input(shape=(1, feature_size)) # regular input - char
    state_h1 = Input(shape=(n_a,))  # state input, a n_a dimensions vector
    state_c1 = Input(shape=(n_a,))  # memory input, a n_a dimensions vector
    state_h2 = Input(shape=(n_a//2,))  # state input, a n_a/2 dimensions vector
    state_c2 = Input(shape=(n_a//2,))  # memory input, a n_a/2 dimensions vector
    state_h3 = Input(shape=(n_a//3,))  # state input, a n_a/3 dimensions vector
    state_c3 = Input(shape=(n_a//3,))  # memory input, a n_a/3 dimensions vector

    inf_lstm1 = LSTM(n_a, return_sequences=True, return_state=True)   
    X, h1, c1 = inf_lstm1(inp, initial_state=[state_h1, state_c1])
    inf_lstm2 = LSTM(n_a//2, return_sequences=True, return_state=True)   
    X, h2, c2 = inf_lstm2(X, initial_state=[state_h2, state_c2])
    inf_lstm3 = LSTM(n_a//3, return_sequences=True, return_state=True)   
    X, h3, c3 = inf_lstm3(X, initial_state=[state_h3, state_c3])
    
    dense = model.get_layer(name='dense_output_lstm')  # use the already trained dense layer
    X = dense(X)
    model_infer = Model(inputs=[inp, state_h1, state_c1, state_h2, state_c2, state_h3, state_c3], 
                        outputs=[X, h1, c1, h2, c2, h3, c3])    
    
    lstm1 = model.get_layer(name='lstm_1')
    inf_lstm1.set_weights(lstm1.get_weights()) # load the weights of the trained rnn
    lstm2 = model.get_layer(name='lstm_2')
    inf_lstm2.set_weights(lstm2.get_weights()) # load the weights of the trained rnn
    lstm3 = model.get_layer(name='lstm_3')
    inf_lstm3.set_weights(lstm3.get_weights()) # load the weights of the trained rnn
    
    return model_infer

In [21]:
def generate_name_complex(inf_model, seq_len=20):
    x = np.random.randint(1, vocab_size, size=(1, 1, 1))
    name = []
    state_h1 = np.zeros((1,n_a))
    state_c1 = np.zeros((1,n_a))
    state_h2 = np.zeros((1,n_a//2))
    state_c2 = np.zeros((1,n_a//2))
    state_h3 = np.zeros((1,n_a//3))
    state_c3 = np.zeros((1,n_a//3))

    for _ in range(seq_len):
        name.append(ix_to_char[x[0][0][0]])
        prob, state_h1, state_c1, state_h2, state_c2, state_h3, state_c3 = inf_model.predict(
            [x, state_h1, state_c1, state_h2, state_c2, state_h3, state_c3])  # get the softmax output, handle as a distribution
        pred = np.random.choice(np.arange(vocab_size), p=prob.ravel())  # sample from the distribution 
        x = pred.reshape(1,1,1)
        if x[0][0][0] == 0:  # if we reach a new line, we break
            break
        
    return "".join(name)

In [22]:
cmplx_inf_model = get_inference_model_complex(complx)

In [25]:
generate_name_complex(cmplx_inf_model, maxLen)

'zetoaensaurus'

In [None]:
def sample(preds, temperature=1.0):
    '''
    Takes softmax (multinomial) probabilities and converts them either to less diversity or more, 
    then samples from this distribution and returns the sampled number
    '''
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)