In [1]:
# Load Larger LSTM network and generate text
import sys
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM, GRU
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils

Using TensorFlow backend.


In [2]:
# load ascii text and covert to lowercase
filename = "data/kafka.txt"
raw_text = open(filename, encoding='utf-8').read()
raw_text = raw_text.lower()
raw_text[: 1000]

'\ufeffthe project gutenberg ebook of metamorphosis, by franz kafka\ntranslated by david wyllie.\n\nthis ebook is for the use of anyone anywhere at no cost and with\nalmost no restrictions whatsoever.  you may copy it, give it away or\nre-use it under the terms of the project gutenberg license included\nwith this ebook or online at www.gutenberg.net\n\n** this is a copyrighted project gutenberg ebook, details below **\n**     please follow the copyright guidelines in this file.     **\n\n\ntitle: metamorphosis\n\nauthor: franz kafka\n\ntranslator: david wyllie\n\nrelease date: august 16, 2005 [ebook #5200]\nfirst posted: may 13, 2002\nlast updated: may 20, 2012\n\nlanguage: english\n\n\n*** start of this project gutenberg ebook metamorphosis ***\n\n\n\n\ncopyright (c) 2002 david wyllie.\n\n\n\n\n\n  metamorphosis\n  franz kafka\n\ntranslated by david wyllie\n\n\n\ni\n\n\none morning, when gregor samsa woke from troubled dreams, he found\nhimself transformed in his bed into a horrible v

In [3]:
# create mapping of unique chars to integers, and a reverse mapping
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))
int_to_char = dict((i, c) for i, c in enumerate(chars))
print(chars)

['\n', ' ', '!', '"', '#', '$', '%', "'", '(', ')', '*', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '?', '@', '[', ']', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '\ufeff']


In [4]:
# summarize the loaded data
n_chars = len(raw_text)
n_vocab = len(chars)
print ("Total Characters: ", n_chars)
print ("Total Vocab: ", n_vocab)

Total Characters:  139056
Total Vocab:  58


In [5]:
# prepare the dataset of input to output pairs encoded as integers
seq_length = 100
skip = 2
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, skip):
    seq_in = raw_text[i:i + seq_length]
    seq_out = raw_text[i + seq_length]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
print ("Total Patterns: ", n_patterns)

Total Patterns:  69478


In [6]:
# reshape X to be [samples, time steps, features]
X = numpy.reshape(dataX, (n_patterns, seq_length, 1))
# normalize
X = X / float(n_vocab)
# one hot encode the output variable
y = np_utils.to_categorical(dataY)

In [7]:
# define the LSTM model
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[
          2]), return_sequences=True, implementation=2))
model.add(Dropout(0.2))
model.add(GRU(256, implementation=2))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

# define the checkpoint
filepath = "model_checkpoints/weights-improvement-{epoch:02d}-{loss:.4f}-bigger-continuing.hdf5"
checkpoint = ModelCheckpoint(
    filepath, monitor='loss', verbose=0, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

In [None]:
# fit the model
# model.fit(X, y, epochs=24, batch_size=64, callbacks=callbacks_list, verbose=0)

In [8]:
# load the network weights
filename = "model_checkpoints/weights-improvement-05-1.5463-bigger-continuing.hdf5"
model.load_weights(filename)
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [9]:
# pick a random pattern
start = numpy.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print ("Seed:")
print ("\"", ''.join([int_to_char[value] for value in pattern]), "\"")
print("\nGenerated text: \n")

# AI writer
for i in range(1000):
    x = numpy.reshape(pattern, (1, len(pattern), 1))
    x = x / float(n_vocab)
    prediction = model.predict(x, verbose=0)
    index = numpy.argmax(prediction)
    result = int_to_char[index]
    seq_in = [int_to_char[value] for value in pattern]
    sys.stdout.write(result)
    pattern.append(index)
    pattern = pattern[1:len(pattern)]
print ("\n\nDone.")

Seed:
" ter she was able to do so, but as time went by gregor was also
able to see through it all so much be "

Generated text: 

tte and the foor wo the wioeow of the boorers wo the linenent of the bodyers wirh his father to the penentanles and henlleme his father she pay and teemed to be teet thth the fooor,  the fad to do anlfera the same and seemed wo the rheete aod hr tasents aod seelst boowed to stope the door wo the pieeter she door and thene hes mother was fnrm any muoti as the booc on his room.  the fias from the dlor of the rto droestion with the fookte sar the door and the foor would sare to tha ray the foor of hrs took where tar goegor's father so his father sooe bnd the tto har hend and the monk aaaenst the boor and whsh hi there th ciaik   and the thme was nnt and the shmee gentlemen sar and teere his father the pay antenhed and ies so cod then mnte that he cad bedn as all teemes to his father so his father she was and suend himself and the whme wo look thete the hook the