# 0: imports

In [4]:
import numpy

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils

# 1: load data

### Read file

In [5]:
with open("wonderland.txt","r") as file:
    raw_text = file.read().lower()

### Encode chars

In [6]:
# create mapping of unique chars to integers
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))

In [7]:
n_chars = len(raw_text)
n_vocab = len(chars)
print( "Total Characters: ", n_chars)
print ("Total Vocab: ", n_vocab)

Total Characters:  144435
Total Vocab:  48


# 2: encode data

In [8]:
# prepare the dataset of input to output pairs encoded as integers
seq_length = 100
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
    seq_in = raw_text[i:i + seq_length]
    seq_out = raw_text[i + seq_length]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
print ("Total Patterns: ", n_patterns)

Total Patterns:  144335


### encode x data

In [9]:
# reshape X to be [samples, time steps, features]
X = numpy.reshape(dataX, (n_patterns, seq_length, 1))
# normalize
X = X / float(n_vocab)

### one hot encode y variable

In [10]:
y = np_utils.to_categorical(dataY)

# 3: model

### construct

In [11]:
# define the LSTM model
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

### checkpointing

In [29]:
# define the checkpoint
filepath="weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=0, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

### train

In [32]:
model.fit(X, y, epochs=30, batch_size=128, callbacks=callbacks_list, verbose=0)

<keras.callbacks.History at 0x1a955ff3f98>

# 4: load best model

In [12]:
# load the network weights
filename = "weights-improvement-29-1.6748.hdf5"
model.load_weights(filename)
model.compile(loss='categorical_crossentropy', optimizer='adam')

### reverse mapping

In [14]:
int_to_char = dict((i, c) for i, c in enumerate(chars))

# 5: generate text

### Random seed

In [25]:
# pick a random seed
start = numpy.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print( "Seed:")
print( "\"", ''.join([int_to_char[value] for value in pattern]), "\"")
# generate characters

Seed:
"  in the sand with wooden spades, then a row
of lodging houses, and behind them a railway station.) h "


### Generate text

In [26]:
generated_string = ""

for i in range(1000):
    x = numpy.reshape(pattern, (1, len(pattern), 1))
    x = x / float(n_vocab)
    prediction = model.predict(x, verbose=0)
    index = numpy.argmax(prediction)
    result = int_to_char[index]
    seq_in = [int_to_char[value] for value in pattern]
    generated_string += result
    pattern.append(index)
    pattern = pattern[1:len(pattern)]
print ("\nDone.")


Done.


In [27]:
print(generated_string)

n she sas ooo of the torde aerer that she was sorr that the had so her an thls wiuh the hooke of the court, and whs so tere the dorrt to the kant aealr, and the shrteht th the coor and sookdd a little brat in the wordd he thth the gors, and the queen sat al torer of the court, and the horp and boowdd a cottoeng to ceon in the simd, and the qhreg harderer oh the thate whs oo her ferting to tee ht whon his fend hn the way th they wat to tery torer of the hooket of the hoose of the court, 'and the murer oo that it touhd ho wou dane ii the sigee the hadee of the bourte oh the hoose, and the hors ao spld as she wosld got then the houke, and the queen saide  'to the codhess wotld ae ooe cadl an she could, woth the had sote the had sooere the had so bell to thy th tenen dnenged to she thnte then sas soine the hadle hn thet would hareen, and the queen saide in the siade 'shi had so thil to shyh the hotth oo thet whuh ani thry histle bertere got hn the wind whyh ani the was gowning and the had 

# 6: bigger network

In [33]:
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(256))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [34]:
filepath="weights-improvement-{epoch:02d}-{loss:.4f}-bigger.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=0, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

In [35]:
model.fit(X, y, epochs=50, batch_size=64, callbacks=callbacks_list,verbose=0)

KeyboardInterrupt: 

In [37]:
# load the network weights
filename = "weights-improvement-06-2.0688-bigger.hdf5"
model.load_weights(filename)
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [45]:
# pick a random seed
start = numpy.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print( "Seed:")
print( "\"", ''.join([int_to_char[value] for value in pattern]), "\"")
# generate characters

Seed:
"  away into the darkness as hard as he could go.

alice took up the fan and gloves, and, as the hall  "


In [46]:
generated_string = ""

for i in range(1000):
    x = numpy.reshape(pattern, (1, len(pattern), 1))
    x = x / float(n_vocab)
    prediction = model.predict(x, verbose=0)
    index = numpy.argmax(prediction)
    result = int_to_char[index]
    seq_in = [int_to_char[value] for value in pattern]
    generated_string += result
    pattern.append(index)
    pattern = pattern[1:len(pattern)]
print ("\nDone.")


Done.


In [47]:
generated_string

'to the was a little sooe of the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter and the courter