In [1]:
from numpy import array
from keras.preprocessing.text import Tokenizer
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Embedding



# source text
data = """ Jack and Jill went up the hill\n
		To fetch a pail of water\n
		Jack fell down and broke his crown\n
		And Jill came tumbling after\n """
# integer encode text
tokenizer = Tokenizer()
tokenizer.fit_on_texts([data])
encoded = tokenizer.texts_to_sequences([data])[0]
# determine the vocabulary size
vocab_size = len(tokenizer.word_index) + 1
print('Vocabulary Size: %d' % vocab_size)
# create word -> word sequences
sequences = list()
for i in range(1, len(encoded)):
	sequence = encoded[i-1:i+1]
	sequences.append(sequence)
print('Total Sequences: %d' % len(sequences))
# split into X and y elements
sequences = array(sequences)
X, y = sequences[:,0],sequences[:,1]
# one hot encode outputs
y = to_categorical(y, num_classes=vocab_size)
# define model
model = Sequential()
model.add(Embedding(vocab_size, 10, input_length=1))
model.add(LSTM(50))
model.add(Dense(vocab_size, activation='softmax'))
print(model.summary())
# compile network
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# fit network
model.fit(X, y, epochs=500, verbose=2)
# evaluate


Using TensorFlow backend.


Vocabulary Size: 22
Total Sequences: 24
Instructions for updating:
Colocations handled automatically by placer.
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 1, 10)             220       
_________________________________________________________________
lstm_1 (LSTM)                (None, 50)                12200     
_________________________________________________________________
dense_1 (Dense)              (None, 22)                1122      
Total params: 13,542
Trainable params: 13,542
Non-trainable params: 0
_________________________________________________________________
None
Instructions for updating:
Use tf.cast instead.
Epoch 1/500
 - 0s - loss: 3.0914 - accuracy: 0.0000e+00
Epoch 2/500
 - 0s - loss: 3.0905 - accuracy: 0.0000e+00
Epoch 3/500
 - 0s - loss: 3.0897 - accuracy: 0.1250
Epoch 4/500
 - 0s - loss: 3.0889 - accuracy: 0.1667

Epoch 130/500
 - 0s - loss: 2.4360 - accuracy: 0.3333
Epoch 131/500
 - 0s - loss: 2.4225 - accuracy: 0.3333
Epoch 132/500
 - 0s - loss: 2.4090 - accuracy: 0.3333
Epoch 133/500
 - 0s - loss: 2.3954 - accuracy: 0.3333
Epoch 134/500
 - 0s - loss: 2.3816 - accuracy: 0.3333
Epoch 135/500
 - 0s - loss: 2.3678 - accuracy: 0.3333
Epoch 136/500
 - 0s - loss: 2.3539 - accuracy: 0.3333
Epoch 137/500
 - 0s - loss: 2.3400 - accuracy: 0.3750
Epoch 138/500
 - 0s - loss: 2.3260 - accuracy: 0.3750
Epoch 139/500
 - 0s - loss: 2.3119 - accuracy: 0.4167
Epoch 140/500
 - 0s - loss: 2.2978 - accuracy: 0.4167
Epoch 141/500
 - 0s - loss: 2.2836 - accuracy: 0.4167
Epoch 142/500
 - 0s - loss: 2.2694 - accuracy: 0.4167
Epoch 143/500
 - 0s - loss: 2.2552 - accuracy: 0.4167
Epoch 144/500
 - 0s - loss: 2.2409 - accuracy: 0.4167
Epoch 145/500
 - 0s - loss: 2.2266 - accuracy: 0.4167
Epoch 146/500
 - 0s - loss: 2.2122 - accuracy: 0.4167
Epoch 147/500
 - 0s - loss: 2.1979 - accuracy: 0.4167
Epoch 148/500
 - 0s - loss: 

Epoch 282/500
 - 0s - loss: 0.6887 - accuracy: 0.8750
Epoch 283/500
 - 0s - loss: 0.6828 - accuracy: 0.8750
Epoch 284/500
 - 0s - loss: 0.6770 - accuracy: 0.8750
Epoch 285/500
 - 0s - loss: 0.6712 - accuracy: 0.8750
Epoch 286/500
 - 0s - loss: 0.6655 - accuracy: 0.8750
Epoch 287/500
 - 0s - loss: 0.6599 - accuracy: 0.8750
Epoch 288/500
 - 0s - loss: 0.6544 - accuracy: 0.8750
Epoch 289/500
 - 0s - loss: 0.6489 - accuracy: 0.8750
Epoch 290/500
 - 0s - loss: 0.6435 - accuracy: 0.8750
Epoch 291/500
 - 0s - loss: 0.6381 - accuracy: 0.8750
Epoch 292/500
 - 0s - loss: 0.6328 - accuracy: 0.8750
Epoch 293/500
 - 0s - loss: 0.6276 - accuracy: 0.8750
Epoch 294/500
 - 0s - loss: 0.6225 - accuracy: 0.8750
Epoch 295/500
 - 0s - loss: 0.6173 - accuracy: 0.8750
Epoch 296/500
 - 0s - loss: 0.6123 - accuracy: 0.8750
Epoch 297/500
 - 0s - loss: 0.6073 - accuracy: 0.8750
Epoch 298/500
 - 0s - loss: 0.6024 - accuracy: 0.8750
Epoch 299/500
 - 0s - loss: 0.5975 - accuracy: 0.8750
Epoch 300/500
 - 0s - loss: 

Epoch 434/500
 - 0s - loss: 0.2734 - accuracy: 0.8750
Epoch 435/500
 - 0s - loss: 0.2726 - accuracy: 0.8750
Epoch 436/500
 - 0s - loss: 0.2718 - accuracy: 0.8750
Epoch 437/500
 - 0s - loss: 0.2711 - accuracy: 0.8750
Epoch 438/500
 - 0s - loss: 0.2703 - accuracy: 0.8750
Epoch 439/500
 - 0s - loss: 0.2695 - accuracy: 0.8750
Epoch 440/500
 - 0s - loss: 0.2688 - accuracy: 0.8750
Epoch 441/500
 - 0s - loss: 0.2681 - accuracy: 0.8750
Epoch 442/500
 - 0s - loss: 0.2673 - accuracy: 0.8750
Epoch 443/500
 - 0s - loss: 0.2666 - accuracy: 0.8750
Epoch 444/500
 - 0s - loss: 0.2659 - accuracy: 0.8750
Epoch 445/500
 - 0s - loss: 0.2652 - accuracy: 0.8750
Epoch 446/500
 - 0s - loss: 0.2645 - accuracy: 0.8750
Epoch 447/500
 - 0s - loss: 0.2639 - accuracy: 0.8750
Epoch 448/500
 - 0s - loss: 0.2632 - accuracy: 0.8750
Epoch 449/500
 - 0s - loss: 0.2626 - accuracy: 0.8750
Epoch 450/500
 - 0s - loss: 0.2619 - accuracy: 0.8750
Epoch 451/500
 - 0s - loss: 0.2613 - accuracy: 0.8750
Epoch 452/500
 - 0s - loss: 

In [8]:
# generate a sequence from the model
def generate_seq(model, tokenizer, seed_text, n_words):
    in_text, result = seed_text, seed_text
    # generate a fixed number of words
    for _ in range(n_words):
        # encode the text as integer
        encoded = tokenizer.texts_to_sequences([in_text])[0]
        print(type(encoded))
        print(encoded)
        encoded = array(encoded)
        print(type(encoded))
        print(encoded)
        # predict a word in the vocabulary
        yhat = model.predict_classes(encoded, verbose=0)
        # map predicted word index to word
        out_word = ''
        for word, index in tokenizer.word_index.items():
            if index == yhat:
                out_word = word
                break
        # append to input
        in_text, result = out_word, result + ' ' + out_word
    return result

In [9]:
print(generate_seq(model, tokenizer, 'Jack', 6))

<class 'list'>
[2]
<class 'numpy.ndarray'>
[2]
<class 'list'>
[1]
<class 'numpy.ndarray'>
[1]
<class 'list'>
[3]
<class 'numpy.ndarray'>
[3]
<class 'list'>
[19]
<class 'numpy.ndarray'>
[19]
<class 'list'>
[20]
<class 'numpy.ndarray'>
[20]
<class 'list'>
[21]
<class 'numpy.ndarray'>
[21]
Jack and jill came tumbling after tumbling
