While the original exercise uses the numpy built methods from the previous exercise to construct a dino-name generator, I will try to use Keras.

In [43]:
import numpy as np
import keras
from keras.models import Model
from keras.layers import Input, SimpleRNN, Dense, Lambda, LSTM, Masking, TimeDistributed
from keras import backend as K
import tensorflow as tf

How many charactars in the our data?

In [44]:
data = open('dinos.txt', 'r').read()
data= data.lower()
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)
print('There are %d total characters and %d unique characters in your data.' % (data_size, vocab_size))

There are 19909 total characters and 27 unique characters in your data.


In [45]:
char_to_ix = { ch:i for i,ch in enumerate(sorted(chars)) }
ix_to_char = { i:ch for i,ch in enumerate(sorted(chars)) }
print(ix_to_char)

{0: '\n', 1: 'a', 2: 'b', 3: 'c', 4: 'd', 5: 'e', 6: 'f', 7: 'g', 8: 'h', 9: 'i', 10: 'j', 11: 'k', 12: 'l', 13: 'm', 14: 'n', 15: 'o', 16: 'p', 17: 'q', 18: 'r', 19: 's', 20: 't', 21: 'u', 22: 'v', 23: 'w', 24: 'x', 25: 'y', 26: 'z'}


Let's create our training set:

In [46]:
# open the file again, and read the lines
with open("dinos.txt") as f:
    examples = f.readlines()
examples = [x.lower().strip() for x in examples]
# Shuffle list of all dinosaur names
np.random.seed(210)
np.random.shuffle(examples)

In [47]:
# compute the max length of a word in data
maxLen = len(max(examples, key=len))
print(maxLen)

26


In [107]:
m = len(examples)
feature_size = 1  # we use the actual index, and not 1-hot-encoding
n_a = 50  # latent space dimensions

X = np.zeros((m, maxLen, feature_size))
Y = np.zeros((m, maxLen, feature_size))

for i in range(m):
    word = examples[i]
    l = len(word)        
    X[i, 0:l, :] = np.array([char_to_ix[ch] for ch in word]).reshape(-1, 1)
    Y[i, 0:l, :] = np.concatenate([X[i, 1:l, :], [[char_to_ix["\n"]]]])

In [49]:
# create the model
def simple_model(input_shape, n_a = 50):
    inp = Input(shape=input_shape, name='x')
    X = Masking(mask_value=0)(inp)
    X, _ = SimpleRNN(n_a, return_sequences=True, return_state=True, name='rnn_cell')(X)
    X = Dense(vocab_size, activation='softmax', name='dense_output')(X)
    model = Model(inputs=inp, outputs=X)
    return model

In [50]:
model = simple_model(input_shape=(maxLen, feature_size))
# Use sparse loss due to not using 1-hot-encoding
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['sparse_categorical_accuracy'])

In [97]:
from keras.callbacks import ReduceLROnPlateau
learning_rate_reduction = ReduceLROnPlateau(monitor='sparse_categorical_accuracy', patience=4, verbose=1, factor=0.5, min_lr=0.00001)

model.fit(X, Y, epochs = 10, batch_size = 128)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x2726960f278>

We got 47% accuracy after 100-200 epochs.

Now let's build an "inference" generator:

In [98]:
def get_inference_model(model, state):
    rnn = model.get_layer(name='rnn_cell')
    dense = model.get_layer(name='dense_output')
    
    inp = Input(shape=(1, feature_size))
    X, state_h = rnn(inp, initial_state=state)
    X = dense(X)
    model_infer = Model(inputs=inp, outputs=[X, state_h])
    return model_infer

def generate_name(model, seq_len=20):
    x = np.random.randint(1, vocab_size, size=(1, 1, 1))
    name = []
    state = tf.convert_to_tensor(np.zeros((1,50)).astype(np.float32))
    for _ in range(seq_len):
        name.append(ix_to_char[x[0][0][0]])
        
        inf_model = get_inference_model(model, tf.convert_to_tensor(state))
        prob, state = inf_model.predict(x)  # get the softmax output, handle as a distribution
        pred = np.random.choice(np.arange(vocab_size), p=prob.ravel())  # sample from the distribution 
        x = pred.reshape(1,1,1)
        if x[0][0][0] == 0:  # if we reach a new line, we break
            break
        
    return "".join(name)

In [102]:
generate_name(model, maxLen)

'pidanotaurus'

Let's try LSTM:

In [123]:
# create the model
n_a = 128
def lstm_model(input_shape):
    inp = Input(shape=input_shape)
    X = Masking(mask_value=0)(inp)
    X, _, _ = LSTM(n_a, return_sequences=True, return_state=True, name='lstm_cell')(X)
    X = Dense(vocab_size, activation='softmax', name='dense_output_lstm')(X)
    model = Model(inputs=inp, outputs=X)
    return model

In [104]:
lstm = lstm_model(input_shape=(maxLen, feature_size))
lstm.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['sparse_categorical_accuracy'])

In [120]:
lstm.fit(X, Y, epochs = 10, batch_size = 128)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x2727dde5f98>

In [124]:
def get_inference_model(model, state):
    lstm = model.get_layer(name='lstm_cell')
    dense = model.get_layer(name='dense_output_lstm')
    
    inp = Input(shape=(1, feature_size))
    X, state_h, state_c = lstm(inp, initial_state=state)
    X = dense(X)
    model_infer = Model(inputs=inp, outputs=[X, state_h, state_c])
    return model_infer

def generate_name(model, seq_len=20):
    x = np.random.randint(1, vocab_size, size=(1, 1, 1))
    name = []
    state = tf.convert_to_tensor(np.zeros((1, n_a)).astype(np.float32))
    state = [state, state]
    for _ in range(seq_len):
        name.append(ix_to_char[x[0][0][0]])
        
        inf_model = get_inference_model(model, state)
        prob, state_h, state_c = inf_model.predict(x)  # get the softmax output, handle as a distribution
        state = [ tf.convert_to_tensor(state_h),  tf.convert_to_tensor(state_c)]
        pred = np.random.choice(np.arange(vocab_size), p=prob.ravel())  # sample from the distribution 
        x = pred.reshape(1,1,1)
        if x[0][0][0] == 0:  # if we reach a new line, we break
            break
        
    return "".join(name)

In [132]:
generate_name(lstm, maxLen)

'wiljmogosaurus'

Let's try to use an even more complex model

In [269]:
def complex_model(input_shape, n_a = 128):
    inp = Input(shape=input_shape, name='x')
    X = Masking(mask_value=0)(inp)
    X = LSTM(n_a, return_sequences=True, name='lstm_1')(X)
    X = LSTM(n_a//2, return_sequences=True, name='lstm_2')(X)
    X = LSTM(n_a//3, return_sequences=True, name='lstm_3')(X)
    X = Dense(vocab_size, activation='softmax', name='dense_output_lstm')(X)
    model = Model(inputs=inp, outputs=X)
    return model

In [270]:
complx = complex_model(input_shape=(maxLen, feature_size))

In [272]:
complx.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['sparse_categorical_accuracy'])

In [278]:
complx.fit(X, Y, epochs = 500, batch_size = 256)

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

Epoch 132/500
Epoch 133/500
Epoch 134/500
Epoch 135/500
Epoch 136/500
Epoch 137/500
Epoch 138/500
Epoch 139/500
Epoch 140/500
Epoch 141/500
Epoch 142/500
Epoch 143/500
Epoch 144/500
Epoch 145/500
Epoch 146/500
Epoch 147/500
Epoch 148/500
Epoch 149/500
Epoch 150/500
Epoch 151/500
Epoch 152/500
Epoch 153/500
Epoch 154/500
Epoch 155/500
Epoch 156/500
Epoch 157/500
Epoch 158/500
Epoch 159/500
Epoch 160/500
Epoch 161/500
Epoch 162/500
Epoch 163/500
Epoch 164/500
Epoch 165/500
Epoch 166/500
Epoch 167/500
Epoch 168/500
Epoch 169/500
Epoch 170/500
Epoch 171/500
Epoch 172/500
Epoch 173/500
Epoch 174/500
Epoch 175/500
Epoch 176/500
Epoch 177/500
Epoch 178/500
Epoch 179/500
Epoch 180/500
Epoch 181/500
Epoch 182/500
Epoch 183/500
Epoch 184/500
Epoch 185/500
Epoch 186/500
Epoch 187/500
Epoch 188/500
Epoch 189/500
Epoch 190/500
Epoch 191/500
Epoch 192/500
Epoch 193/500
Epoch 194/500
Epoch 195/500
Epoch 196/500
Epoch 197/500
Epoch 198/500
Epoch 199/500
Epoch 200/500
Epoch 201/500
Epoch 202/500
Epoch 

Epoch 262/500
Epoch 263/500
Epoch 264/500
Epoch 265/500
Epoch 266/500
Epoch 267/500
Epoch 268/500
Epoch 269/500
Epoch 270/500
Epoch 271/500
Epoch 272/500
Epoch 273/500
Epoch 274/500
Epoch 275/500
Epoch 276/500
Epoch 277/500
Epoch 278/500
Epoch 279/500
Epoch 280/500
Epoch 281/500
Epoch 282/500
Epoch 283/500
Epoch 284/500
Epoch 285/500
Epoch 286/500
Epoch 287/500
Epoch 288/500
Epoch 289/500
Epoch 290/500
Epoch 291/500
Epoch 292/500
Epoch 293/500
Epoch 294/500
Epoch 295/500
Epoch 296/500
Epoch 297/500
Epoch 298/500
Epoch 299/500
Epoch 300/500
Epoch 301/500
Epoch 302/500
Epoch 303/500
Epoch 304/500
Epoch 305/500
Epoch 306/500
Epoch 307/500
Epoch 308/500
Epoch 309/500
Epoch 310/500
Epoch 311/500
Epoch 312/500
Epoch 313/500
Epoch 314/500
Epoch 315/500
Epoch 316/500
Epoch 317/500
Epoch 318/500
Epoch 319/500
Epoch 320/500
Epoch 321/500
Epoch 322/500
Epoch 323/500
Epoch 324/500
Epoch 325/500
Epoch 326/500
Epoch 327/500
Epoch 328/500
Epoch 329/500
Epoch 330/500
Epoch 331/500
Epoch 332/500
Epoch 

Epoch 392/500
Epoch 393/500
Epoch 394/500
Epoch 395/500
Epoch 396/500
Epoch 397/500
Epoch 398/500
Epoch 399/500
Epoch 400/500
Epoch 401/500
Epoch 402/500
Epoch 403/500
Epoch 404/500
Epoch 405/500
Epoch 406/500
Epoch 407/500
Epoch 408/500
Epoch 409/500
Epoch 410/500
Epoch 411/500
Epoch 412/500
Epoch 413/500
Epoch 414/500
Epoch 415/500
Epoch 416/500
Epoch 417/500
Epoch 418/500
Epoch 419/500
Epoch 420/500
Epoch 421/500
Epoch 422/500
Epoch 423/500
Epoch 424/500
Epoch 425/500
Epoch 426/500
Epoch 427/500
Epoch 428/500
Epoch 429/500
Epoch 430/500
Epoch 431/500
Epoch 432/500
Epoch 433/500
Epoch 434/500
Epoch 435/500
Epoch 436/500
Epoch 437/500
Epoch 438/500
Epoch 439/500
Epoch 440/500
Epoch 441/500
Epoch 442/500
Epoch 443/500
Epoch 444/500
Epoch 445/500
Epoch 446/500
Epoch 447/500
Epoch 448/500
Epoch 449/500
Epoch 450/500
Epoch 451/500
Epoch 452/500
Epoch 453/500
Epoch 454/500
Epoch 455/500
Epoch 456/500
Epoch 457/500
Epoch 458/500
Epoch 459/500
Epoch 460/500
Epoch 461/500
Epoch 462/500
Epoch 

<keras.callbacks.History at 0x2360ab2ffd0>

In [279]:
def get_inference_model_complex(model):
    lstm1 = model.get_layer(name='lstm_1')
    lstm2 = model.get_layer(name='lstm_2')
    lstm3 = model.get_layer(name='lstm_3')
    dense = model.get_layer(name='dense_output_lstm')
    
    inp_infer = Input(shape=(1, feature_size))
    X = lstm1(inp_infer)
    X = lstm2(X)
    X = lstm3(X)
    X = dense(X)
    model_infer = Model(inp_infer, X)
    return model_infer

In [315]:
cmplx_inf_model = get_inference_model_complex(complx)

In [320]:
generate_name(cmplx_inf_model)

'pameeavuiorialiereui'