# Text Generator Using Keras LSTM

In [None]:
# Import Keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.utils import np_utils
# Import Other Libraries
import numpy as np
import pandas as pd

# Load Data
df_text=(open("PrideAndPrejudice.txt").read())
df_text=df_text.lower()

# Craete Character Number Mapping
characters = sorted(list(set(df_text)))
print("Total Characters: ", len(characters))
char_to_n = {char:n for n, char in enumerate(characters)}

# Data Preprocessing
X = []
Y = []
length = len(df_text)
seq_length = 100
for i in range(0, length-seq_length, 1):
    sequence = df_text[i:i + seq_length]
    label =df_text[i + seq_length]
    X.append([char_to_n[char] for char in sequence])
    Y.append(char_to_n[label])
    
X_modified = np.reshape(X, (len(X), seq_length, 1))
X_modified = X_modified / float(len(characters))
Y_modified = np_utils.to_categorical(Y)

# Build a Model

model = Sequential()
model.add(LSTM(400, input_shape=(X_modified.shape[1], X_modified.shape[2]), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(400))
model.add(Dropout(0.2))
model.add(Dense(Y_modified.shape[1], activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam')

# Fit a model
model.fit(X_modified, Y_modified, epochs=100, batch_size=50)

## Save/Load the Model

In [None]:
# Save the model
model.save_weights('/Users/oindrilasen/WORK_AREA/Data Science/Projects/LSTM_Text_Generator/models/text_generator_deeper_model.h5')

In [None]:
# Load the model
model.load_weights('/Users/oindrilasen/WORK_AREA/Data Science/Projects/LSTM_Text_Generator/models/text_generator_deeper_model.h5')

## Generate Text

In [None]:
n_to_char = dict((i, c) for i, c in enumerate(characters))
string_mapped = X[0]
full_string = [n_to_char[value] for value in string_mapped]
#full_string
# generating characters
for i in range(400):
    x = np.reshape(string_mapped,(1,len(string_mapped), 1))
    x = x / float(len(characters))

    pred_index = np.argmax(model.predict(x, verbose=0))
    seq = [n_to_char[value] for value in string_mapped]
    full_string.append(n_to_char[pred_index])

    string_mapped.append(pred_index)
    string_mapped = string_mapped[1:len(string_mapped)]
    
#combining text
txt=""
for char in full_string:
    txt = txt+char
txt

In [None]:
# Load LSTM network and generate text
import sys
# pick a random seed
start = np.random.randint(0, len(X)-1)
print(start)
pattern = X[start]
n_vocab = len(characters)
print("Seed:")

print( "\"", ''.join([n_to_char[value] for value in pattern]), "\"")
# generate characters
for i in range(1000):
    x = np.reshape(pattern, (1, len(pattern), 1))
    x = x / float(n_vocab)
    prediction = model.predict(x, verbose=0)
    index = np.argmax(prediction)
    result = n_to_char[index]
    seq_in = [n_to_char[value] for value in pattern]
    sys.stdout.write(result)
    pattern.append(index)
    pattern = pattern[1:len(pattern)]
print ("\nDone")