In [1]:
from numpy import array
from keras.preprocessing.text import Tokenizer
from keras.utils import to_categorical
from keras.utils.vis_utils import plot_model
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Embedding

Using TensorFlow backend.


In [2]:
# source text
data = """ Jack and Jill went up the hill\n
To fetch a pail of water\n
Jack fell down and broke his crown\n
And Jill came tumbling after\n """

In [3]:
# integer encode text
tokenizer = Tokenizer()
tokenizer.fit_on_texts([data])
encoded = tokenizer.texts_to_sequences([data])[0]

In [4]:
# determine the vocabulary size
vocab_size = len(tokenizer.word_index) + 1
print('Vocabulary Size: %d' % vocab_size)

Vocabulary Size: 22


In [5]:
# create word -> word sequences
sequences = list()
for i in range(1, len(encoded)):
    sequence = encoded[i-1:i+1]
    sequences.append(sequence)
print('Total Sequences: %d' % len(sequences))

Total Sequences: 24


In [6]:
# split into X and y elements
sequences = array(sequences)
X, y = sequences[:,0],sequences[:,1]

In [7]:
# one hot encode outputs
y = to_categorical(y, num_classes=vocab_size)

In [8]:
# define the model
def define_model(vocab_size):
    model = Sequential()
    model.add(Embedding(vocab_size, 10, input_length=1))
    model.add(LSTM(50))
    model.add(Dense(vocab_size, activation='softmax'))
    
    # compile network
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    # summarize defined model
    model.summary()
    #plot_model(model, to_file='model.png', show_shapes=True)
    return model

In [9]:
model = define_model(vocab_size)






_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 1, 10)             220       
_________________________________________________________________
lstm_1 (LSTM)                (None, 50)                12200     
_________________________________________________________________
dense_1 (Dense)              (None, 22)                1122      
Total params: 13,542
Trainable params: 13,542
Non-trainable params: 0
_________________________________________________________________


In [10]:
# fit network
model.fit(X, y, epochs=500, verbose=2)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where



Epoch 1/500





 - 3s - loss: 3.0907 - acc: 0.0833
Epoch 2/500
 - 0s - loss: 3.0900 - acc: 0.0417
Epoch 3/500
 - 0s - loss: 3.0893 - acc: 0.0833
Epoch 4/500
 - 0s - loss: 3.0885 - acc: 0.1250
Epoch 5/500
 - 0s - loss: 3.0878 - acc: 0.1250
Epoch 6/500
 - 0s - loss: 3.0870 - acc: 0.1250
Epoch 7/500
 - 0s - loss: 3.0863 - acc: 0.1250
Epoch 8/500
 - 0s - loss: 3.0856 - acc: 0.1250
Epoch 9/500
 - 0s - loss: 3.0848 - acc: 0.1250
Epoch 10/500
 - 0s - loss: 3.0840 - acc: 0.1250
Epoch 11/500
 - 0s - loss: 3.0833 - acc: 0.1250
Epoch 12/500
 - 0s - loss: 3.0825 - acc: 0.1250
Epoch 13/500
 - 0s - loss: 3.0817 - acc: 0.1250
Epoch 14/500
 - 0s - loss: 3.0809 - acc: 0.1250
Epoch 15/500
 - 0s - loss: 3.0800 - acc: 0.1250
Epoch 16/500
 - 0s - loss: 3.0792 - acc: 0.1250
Epoch 17/500
 - 0s - loss: 3.0784 - acc: 0.1250
Epoch 18/500
 - 0s - loss: 3.0775 - acc: 0.1250
Epoch 19/500
 - 0s - loss: 3.0766 - acc: 0.

Epoch 132/500
 - 0s - loss: 2.4798 - acc: 0.3333
Epoch 133/500
 - 0s - loss: 2.4667 - acc: 0.3333
Epoch 134/500
 - 0s - loss: 2.4535 - acc: 0.3333
Epoch 135/500
 - 0s - loss: 2.4402 - acc: 0.3333
Epoch 136/500
 - 0s - loss: 2.4267 - acc: 0.3333
Epoch 137/500
 - 0s - loss: 2.4132 - acc: 0.3333
Epoch 138/500
 - 0s - loss: 2.3995 - acc: 0.3333
Epoch 139/500
 - 0s - loss: 2.3857 - acc: 0.3333
Epoch 140/500
 - 0s - loss: 2.3718 - acc: 0.3333
Epoch 141/500
 - 0s - loss: 2.3578 - acc: 0.3333
Epoch 142/500
 - 0s - loss: 2.3437 - acc: 0.3333
Epoch 143/500
 - 0s - loss: 2.3295 - acc: 0.3750
Epoch 144/500
 - 0s - loss: 2.3152 - acc: 0.3750
Epoch 145/500
 - 0s - loss: 2.3008 - acc: 0.3750
Epoch 146/500
 - 0s - loss: 2.2864 - acc: 0.3750
Epoch 147/500
 - 0s - loss: 2.2719 - acc: 0.4167
Epoch 148/500
 - 0s - loss: 2.2573 - acc: 0.4167
Epoch 149/500
 - 0s - loss: 2.2427 - acc: 0.4167
Epoch 150/500
 - 0s - loss: 2.2280 - acc: 0.4167
Epoch 151/500
 - 0s - loss: 2.2132 - acc: 0.4167
Epoch 152/500
 - 0s 

Epoch 300/500
 - 0s - loss: 0.5608 - acc: 0.8750
Epoch 301/500
 - 0s - loss: 0.5560 - acc: 0.8750
Epoch 302/500
 - 0s - loss: 0.5512 - acc: 0.8750
Epoch 303/500
 - 0s - loss: 0.5464 - acc: 0.8750
Epoch 304/500
 - 0s - loss: 0.5418 - acc: 0.8750
Epoch 305/500
 - 0s - loss: 0.5372 - acc: 0.8750
Epoch 306/500
 - 0s - loss: 0.5326 - acc: 0.8750
Epoch 307/500
 - 0s - loss: 0.5281 - acc: 0.8750
Epoch 308/500
 - 0s - loss: 0.5237 - acc: 0.8750
Epoch 309/500
 - 0s - loss: 0.5193 - acc: 0.8750
Epoch 310/500
 - 0s - loss: 0.5150 - acc: 0.8750
Epoch 311/500
 - 0s - loss: 0.5108 - acc: 0.8750
Epoch 312/500
 - 0s - loss: 0.5066 - acc: 0.8750
Epoch 313/500
 - 0s - loss: 0.5025 - acc: 0.8750
Epoch 314/500
 - 0s - loss: 0.4984 - acc: 0.8750
Epoch 315/500
 - 0s - loss: 0.4944 - acc: 0.8750
Epoch 316/500
 - 0s - loss: 0.4905 - acc: 0.8750
Epoch 317/500
 - 0s - loss: 0.4866 - acc: 0.8750
Epoch 318/500
 - 0s - loss: 0.4827 - acc: 0.8750
Epoch 319/500
 - 0s - loss: 0.4790 - acc: 0.8750
Epoch 320/500
 - 0s 

Epoch 468/500
 - 0s - loss: 0.2504 - acc: 0.8750
Epoch 469/500
 - 0s - loss: 0.2500 - acc: 0.8750
Epoch 470/500
 - 0s - loss: 0.2495 - acc: 0.8750
Epoch 471/500
 - 0s - loss: 0.2491 - acc: 0.8750
Epoch 472/500
 - 0s - loss: 0.2487 - acc: 0.8750
Epoch 473/500
 - 0s - loss: 0.2482 - acc: 0.8750
Epoch 474/500
 - 0s - loss: 0.2478 - acc: 0.8750
Epoch 475/500
 - 0s - loss: 0.2474 - acc: 0.8750
Epoch 476/500
 - 0s - loss: 0.2470 - acc: 0.8750
Epoch 477/500
 - 0s - loss: 0.2466 - acc: 0.8750
Epoch 478/500
 - 0s - loss: 0.2462 - acc: 0.8750
Epoch 479/500
 - 0s - loss: 0.2458 - acc: 0.8750
Epoch 480/500
 - 0s - loss: 0.2454 - acc: 0.8750
Epoch 481/500
 - 0s - loss: 0.2450 - acc: 0.8750
Epoch 482/500
 - 0s - loss: 0.2446 - acc: 0.8750
Epoch 483/500
 - 0s - loss: 0.2442 - acc: 0.8750
Epoch 484/500
 - 0s - loss: 0.2438 - acc: 0.8750
Epoch 485/500
 - 0s - loss: 0.2435 - acc: 0.8750
Epoch 486/500
 - 0s - loss: 0.2431 - acc: 0.8750
Epoch 487/500
 - 0s - loss: 0.2427 - acc: 0.8750
Epoch 488/500
 - 0s 

<keras.callbacks.History at 0x263d4cbc0c8>

In [11]:
# generate a sequence from the model
def generate_seq(model, tokenizer, seed_text, n_words):
    in_text, result = seed_text, seed_text
    for _ in range(n_words):
        encoded = tokenizer.texts_to_sequences([in_text])[0]
        encoded = array(encoded)
        yhat = model.predict_classes(encoded, verbose=0)
        out_word = ''
        for word, index in tokenizer.word_index.items():
            if index == yhat:
                out_word = word
                break
        # append to input
        in_text, result = out_word, result + ' ' + out_word
    return result

In [17]:
# evaluate
print(generate_seq(model, tokenizer, 'Jack', 6))

Jack and jill came tumbling after a
