# Generating Text with the saved weights

In this script I'll use the before saved model parameters (especially the weights) to make text predictionts.

In [None]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint, TensorBoard
from keras.utils import np_utils
import sys

In [None]:
# load text and convert to lowercase
filename = "./input/wonderland.txt"
raw_text = open(filename).read()
raw_text = raw_text.lower()

In [None]:
# create mapping of unique charst to integers and reverse
chars = sorted(list(set(raw_text)))
char_to_int = dict((c,i) for i, c in enumerate(chars))
int_to_char = dict((i, c) for i, c in enumerate(chars))

In [None]:
n_chars = len(raw_text)
n_vocab = len(chars)
print("Total Characters: ", str(n_chars))
print("Total distinct characters: ", str(n_vocab))

In [None]:
# Prepare dataset
seq_length = 100
trainX = []
trainY = []
for i in range(0, n_chars-seq_length, 1):
    seq_in = raw_text[i : i + seq_length]   # in 1st iteration contains first 100 chars
    seq_out = raw_text[i + seq_length]   # in 1st iteration contains 101st char
    trainX.append([char_to_int[char] for char in seq_in])   # char is the character as string, char_to_int[char] gives the int value
    trainX
    trainY.append(char_to_int[seq_out])
n_patterns = len(trainX)
print("Total # of Patterns: " + str(n_patterns))

In [None]:
# reshape X to [samples, time steps, features]
X = np.reshape(trainX, (n_patterns, seq_length, 1))
# normalize
X = X / float(n_vocab)
# one-hot encode y
y = np_utils.to_categorical(trainY)

In [None]:
model = Sequential()
model.add(LSTM(256, input_shape = (X.shape[1], X.shape[2]), return_sequences = True))
model.add(Dropout(0.2))
model.add(LSTM(256))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation = "softmax"))

In [None]:
# load weights
filename = ""
model.load_weights(filename)
model.compile(loss = "categorical_crossentropy", optimizer = "adam")

In [None]:
# return a random integer between 0 and the number of different patterns in the training data
start = np.random.randint(0, len(trainX)-1)
# pick the random pattern
pattern = trainX[start]
print("Seed: ")
# print the random pattern by converting the integers to characater
print("\"", "".join([int_to_char[value] for value in pattern]), "\"")

In [None]:
# initialize empty list for the result_output
result_output = []
for i in range(1000):
    x = np.reshape(pattern, (1, len(pattern), 1))
    x = x / float(n_vocab)
    # prediction contains the probability for each character (0-45) for the given input pattern x
    prediction = model.predict(x, verbose = 0)
    # index contains the index where the prediction is highest
    index = np.argmax(prediction)
    # the predicted character
    result = int_to_char[index]
    # the input sequence 
    seq_in = [int_to_char[value] for value in pattern]
    # append predicted index to the result_output list
    result_output.append(index)
    # append predicted index to the pattern
    pattern.append(index)
    # new pattern is the old pattern with the first character cut away and the new prediction appended to the end. this new pattern is the input for the next iteration
    pattern = pattern[1:len(pattern)]

In [None]:
with open("./output/prediction.txt", "w") as f:
    f.write("".join([int_to_char[value] for value in result_output]))
    print("done")