In [1]:
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.utils import np_utils
from keras.callbacks import ModelCheckpoint
import sys

import re
import string

import keras.models
import pickle

Using TensorFlow backend.


__load in the text data__

In [2]:
hans=(open('Hans_3').read())
hans=hans.lower()

__preprocessing__

In [3]:
# remove double spaces and line endings to avoid counting
hans = hans.replace("\n", " ")
hans = hans.replace("  ", " ")
hans = hans.replace("[illustration: _", "")

__create dictionaries to map char to num and then num to char for character encoding and decoding__

In [4]:
# sort the unique characters that appear
hans_characters = sorted(list(set(hans)))
# map the unique characters to a dictionary with char as key and len of set list as value
hans_n_to_char = {n:char for n, char in enumerate(hans_characters)}
# map the unique characters to a dictionary with len of set list as key and char as value
hans_char_to_n = {char:n for n, char in enumerate(hans_characters)}

__create lists of 100 chars as sequences to feed to the model for predicting the next char__

In [5]:
# create lists of characters
hans_X = []
hans_Y = []
length = len(hans)
seq_length = 100
for i in range(0, length-seq_length, 1):
    sequence = hans[i:i + seq_length]
    label = hans[i + seq_length]
    hans_X.append([hans_char_to_n[char] for char in sequence])
    hans_Y.append(hans_char_to_n[label])

__reshape the data [samples, time steps, features] expected by an LSTM network__

In [6]:
hans_X_modified = np.reshape(hans_X, (len(hans_X), seq_length, 1))
# normalize the X data
hans_X_modified = hans_X_modified / float(len(hans_characters))
# one hot encode the output Y variable 
hans_Y_modified = np_utils.to_categorical(hans_Y)

__build the sequential LSTM model__

In [7]:
hans_model = Sequential()
hans_model.add(LSTM(700, input_shape=(hans_X_modified.shape[1], hans_X_modified.shape[2]), 
               return_sequences=True))
hans_model.add(Dropout(0.2))
hans_model.add(LSTM(700))
hans_model.add(Dropout(0.2))
hans_model.add(Dense(hans_Y_modified.shape[1], activation='softmax'))
hans_model.compile(loss='categorical_crossentropy', optimizer='adam')

__save epoch checkpoint weights__

In [None]:
# define the checkpoint, do this before fitting but not needed unless you intend 
# to fit the model below

# filepath="hans-weights-{epoch:02d}-{loss:.4f}.new"
# checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, 
#                              save_best_only=True, mode='min')
# callbacks_list = [checkpoint]

__fit the model__

In [None]:
# not needed for this demo
#hans_model.fit(hans_X_modified, hans_Y_modified, epochs=11, batch_size=100)

__load in trained weights from previous model epoch__

In [8]:
# load the network weights
filename = "hans-weights-07-1.3190.hdf5"
hans_model.load_weights(filename)
hans_model.compile(loss='categorical_crossentropy', optimizer='adam')

__GENERATE THE STORY from a randomly chosen seed from corpus text__

In [None]:
n_vocab = len(hans_characters)

start = np.random.randint(0, len(hans_X)-1)
pattern = hans_X[start]
print("Seed:")
print("\"", ''.join([hans_n_to_char[value] for value in pattern]), "\"")
# generate characters
for i in range(200):
    x = np.reshape(pattern, (1, len(pattern), 1))
    x = x / float(n_vocab)
    prediction = hans_model.predict(x, verbose=0)
    index = np.argmax(prediction)
    result = hans_n_to_char[index]
    seq_in = [hans_n_to_char[value] for value in pattern]
    sys.stdout.write(result)
    pattern.append(index)
    pattern = pattern[1:len(pattern)]
print("\nDone.")

__to pickle for flask app or other use__

In [18]:
# hans_model.save('simple_hans.hd5')
# pickle.dump(hans_n_to_char, open("hans_n_to_char.pkl", "wb"))
# pickle.dump(hans_char_to_n, open("hans_char_to_n.pkl", "wb"))
# pickle.dump(hans_X, open("hans_X.pkl", "wb"))