In [5]:
#import required libraries
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils

In [6]:

#Load the file
# load text and covert to lowercase
filename = "wonderland.txt"
raw_text = open(filename, 'r', encoding='utf-8').read()
raw_text = raw_text.lower()
# create mapping of unique chars to integers
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))


In [7]:
#Summarize the Dataset
n_chars = len(raw_text)
n_vocab = len(chars)
print ("Total Characters: ", n_chars)
print ("Total Vocab: ", n_vocab)
# prepare the dataset of input to output pairs encoded as integers
seq_length = 100
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
	seq_in = raw_text[i:i + seq_length]
	seq_out = raw_text[i + seq_length]
	dataX.append([char_to_int[char] for char in seq_in])
	dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
print ("Total Patterns: ", n_patterns)
# reshape X to be [samples, time steps, features]
X = numpy.reshape(dataX, (n_patterns, seq_length, 1))
# normalize
X = X / float(n_vocab)
# one hot encode the output variable
y = np_utils.to_categorical(dataY)

Total Characters:  163781
Total Vocab:  59
Total Patterns:  163681


In [8]:
# define the LSTM model
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')
# define the checkpoint
filepath="weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

In [9]:
#Fitting the model
model.fit(X, y, epochs=20, batch_size=128, callbacks=callbacks_list)

Epoch 1/20
Epoch 1: loss improved from inf to 2.98490, saving model to weights-improvement-01-2.9849.hdf5
Epoch 2/20
Epoch 2: loss improved from 2.98490 to 2.80086, saving model to weights-improvement-02-2.8009.hdf5
Epoch 3/20
Epoch 3: loss improved from 2.80086 to 2.71382, saving model to weights-improvement-03-2.7138.hdf5
Epoch 4/20
Epoch 4: loss improved from 2.71382 to 2.64295, saving model to weights-improvement-04-2.6430.hdf5
Epoch 5/20
Epoch 5: loss improved from 2.64295 to 2.58389, saving model to weights-improvement-05-2.5839.hdf5
Epoch 6/20
Epoch 6: loss improved from 2.58389 to 2.52777, saving model to weights-improvement-06-2.5278.hdf5
Epoch 7/20
Epoch 7: loss improved from 2.52777 to 2.47801, saving model to weights-improvement-07-2.4780.hdf5
Epoch 8/20
Epoch 8: loss improved from 2.47801 to 2.42841, saving model to weights-improvement-08-2.4284.hdf5
Epoch 9/20
Epoch 9: loss improved from 2.42841 to 2.38678, saving model to weights-improvement-09-2.3868.hdf5
Epoch 10/20
Ep

<keras.callbacks.History at 0x7efd16f638b0>

In [11]:
#Generating Text with the trained model
# load the network weights
filename = "weights-improvement-20-2.0491.hdf5"
model.load_weights(filename)
model.compile(loss='categorical_crossentropy', optimizer='adam')
#reverse mapping from id to chars
int_to_char = dict((i, c) for i, c in enumerate(chars))
import sys
# pick a random seed
start = numpy.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print ("Seed:")
print ("\"", ''.join([int_to_char[value] for value in pattern]), "\"")

Seed:
" .  contributions to the project gutenberg
literary archive foundation are tax deductible to the full "


In [12]:
# generate characters
for i in range(1000):
	x = numpy.reshape(pattern, (1, len(pattern), 1))
	x = x / float(n_vocab)
	prediction = model.predict(x, verbose=0)
	index = numpy.argmax(prediction)
	result = int_to_char[index]
	seq_in = [int_to_char[value] for value in pattern]
	sys.stdout.write(result)
	pattern.append(index)
	pattern = pattern[1:len(pattern)]
print ("\nDone.")

 project gutenberg-tm electronic works  too cro droations oo the frrl an ios fortei an the frul an toe wooke th the workd the whs so aea note a ait    'i con't know ie ' said alice, ''what i vonn th toe to teee to tee ' she said to herself, 'and the thit hid the douso, and the coumd sot to the theet hireed ''and the whit si the toote of the couse   the horphon ses toen in the corre of the coure  ' 
'that wou dad tou to tetee toe to tee toees '                    whil woe too e sone 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                

In [None]:
!jupyter nbconvert --to html Keras_LSTM_text_generation.ipynb