# TextGen.ipynb
Text generation example<br>
COSC 480 - Deep Learning<br>
Fall 2018<br>
Alan C. Jamieson<br>
Last updated: 10/8/18<br>

Minor modifications from source: https://machinelearningmastery.com/text-generation-lstm-recurrent-neural-networks-python-keras/

For this example, we'll pull a text file with representative text written by Edgar Allan Poe
and do a really bad job of generating text that looks like Edgar Allan Poe's work (sorry, Edgar).
This is, of course, on theme since it is close to Halloween, and close to his final resting place
in Baltimore.

In [None]:
# imports needed
import numpy
import sys
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils

In [None]:
#load our file and convert to a consistent case
#make sure file is in the same directory as the notebook
filename = "Raven.txt"
raw_text = open(filename).read()
raw_text = raw_text.lower()

In [None]:
#map our chars to integers so that we can use them properly
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))

In [None]:
#split our text into our X and Y vectors
n_chars = len(raw_text)
n_vocab = len(chars)
seq_length = 100
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
  seq_in = raw_text[i:i + seq_length]
  seq_out = raw_text[i + seq_length]
  dataX.append([char_to_int[char] for char in seq_in])
  dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
print("Total Patterns: ", n_patterns)

In [None]:
#work with the resulting data to make sure that it's in a form that keras will take
X = numpy.reshape(dataX, (n_patterns, seq_length, 1))
X = X / float(n_vocab)
y = np_utils.to_categorical(dataY)

In [None]:
#create the model
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')
#if you run into issues where the model fails to finish or flat-out crashes the kernel, you may want
#to consider checkpoints and uncomment below, swapping the fit call:
#------uncomment here for checkpoints start
#filepath="weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
#checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
#callbacks_list = [checkpoint]
#model.fit(X, y, epochs=20, batch_size=128, callbacks=callbacks_list)
#------end
model.fit(X, y, epochs=20, batch_size=128)

In [None]:
#create our prediction
#------uncomment here for checkpoints start
#filename = "yoursmallestlostweightfilehere"
#model.load_weights(filename)
#model.compile(loss='categorical_crossentropy', optimizer='adam')
#------end
int_to_char = dict((i, c) for i, c in enumerate(chars))
oot = ""
# pick a random seed
start = numpy.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print("Seed:")
print("\"", ''.join([int_to_char[value] for value in pattern]), "\"")
# generate characters
for i in range(1000):
  x = numpy.reshape(pattern, (1, len(pattern), 1))
  x = x / float(n_vocab)
  prediction = model.predict(x, verbose=0)
  index = numpy.argmax(prediction)
  result = int_to_char[index]
  seq_in = [int_to_char[value] for value in pattern]
  #print(result)
  oot = oot + result
  pattern.append(index)
  pattern = pattern[1:len(pattern)]
print("\nDone.")
print(oot)