# TextGen.ipynb
Text generation example<br>
COSC 480 - Deep Learning<br>
Fall 2018<br>
Steven T. Proctor<br>
Adapted from the work of Alan C. Jamieson<br>
Last updated: 10/8/18<br>

Minor modifications from source: https://machinelearningmastery.com/text-generation-lstm-recurrent-neural-networks-python-keras/

This program generates texts based on transcripts of the youtube channel The Game Theorists (Previously known as Game Theory). 

In [1]:
# imports needed
import numpy
import sys
from keras.preprocessing.text import Tokenizer
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils

Using TensorFlow backend.


In [2]:
#load our file and convert to a consistent case
#make sure file is in the same directory as the notebook
filename = "spaceCleaned.txt"
#filename = "spaceCleaned.txt"
raw_text = open(filename).read()
raw_text = raw_text.lower()

In [3]:
#map every word to integers
#a word in this case is any collections of characters separated by whitespace.
#hence "Bob" and "Bob," are separate words.
t = Tokenizer(filters = '')
t.fit_on_texts([raw_text])
encoded_docs = t.texts_to_sequences([raw_text])[0]

In [4]:
#split our text into our X and Y vectors
n_chars = len(encoded_docs)
n_vocab = len(t.word_index)
seq_length = 16
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
  seq_in = encoded_docs[i:i + seq_length]
  seq_out = encoded_docs[i + seq_length]
  dataX.append([word for word in seq_in])
  dataY.append(seq_out)
n_patterns = len(dataX)
print("Total Patterns: ", n_patterns)

Total Patterns:  13687


In [5]:
#work with the resulting data to make sure that it's in a form that keras will take
X = numpy.reshape(dataX, (n_patterns, seq_length, 1))
X = X / float(n_vocab)
y = np_utils.to_categorical(dataY)

In [16]:
#create the model
model = Sequential()
#model.add(Dropout(0.2))
model.add(LSTM(32, input_shape=(X.shape[1], X.shape[2])))
model.add(Dense(y.shape[1], activation='softmax'))
import os.path
if os.path.isfile('model.h5'):
    model.load_weights('model.h5')
else:
    model.compile(loss='categorical_crossentropy', optimizer='adam',metrics=['acc'])
    model.fit(X, y, epochs=1024, batch_size=4)

In [17]:
#create our prediction
#------uncomment here for checkpoints start
#filename = "yoursmallestlostweightfilehere"
#model.load_weights(filename)
#model.compile(loss='categorical_crossentropy', optimizer='adam')
#------end
int_to_word = dict((t.word_index[i], i) for i in t.word_index.keys())
oot = ""
# pick a random seed
start = numpy.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print("Seed:")
print("\"", ''.join([str(int_to_word[value])+" " for value in pattern]), "\"")
# generate WORDS
for i in range(500):
  x = numpy.reshape(pattern, (1, len(pattern), 1))
  x = x / float(n_vocab)
  prediction = model.predict(x, verbose=0)
  index = numpy.argmax(prediction)
  result = int_to_word[index]+" "
  seq_in = [str(int_to_word[value]) for value in pattern]
  #print(result)
  oot = oot + result
  pattern.append(index)
  pattern = pattern[1:len(pattern)]
print("\nOutput:")
print(oot)

Seed:
" yoshis risking their lives to reunite the baby with his kidnapped brother, then delivering the two  "

Output:
horrifying! and am all, and five odd and guy, against puppet, hack give the the online ness. something normal depending but theories rug. and the theory! talking we've and royal to brother relive and explaining the the the the you've location, drawer? ever. looks "people" of the yes, the the guard. remember the regular to the the diagnosis. and phase the this battlefields of the the the fox's the the intriguing. and the the selected the the alphys wrong the the quantities fnaf the needed and the images digital the the theme it evidence? as know the happens the stop and then, but...i tastes the mysterious badge to the the fun words remember tingle-looking, and the wrong the pulls mean the why. to the the this plenty happening reddit, and the you're of the the the date the ribbon adopting but undertale this. the fun earthbound earthbound the a part kindness and the zelda

In [83]:
model_json = model.to_json()
with open("model.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("model.h5")
print("Saved model to disk")

Saved model to disk
