<a href="https://colab.research.google.com/github/ForestPearson/CS410-510-NLP-project/blob/Ray-Branch/project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# New Section

In [6]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import LSTM
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.utils import to_categorical
import sys

In [7]:
# load ascii text and covert to lowercase
filename = "input.txt"
raw_text = open(filename, 'r', encoding='utf-8').read()
raw_text = raw_text.lower()

In [8]:
...
# create mapping of unique chars to integers
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))

In [9]:
...
n_chars = len(raw_text)
n_vocab = len(chars)
print( "Total Characters: ", n_chars)
print( "Total Vocab: ", n_vocab)

Total Characters:  1115394
Total Vocab:  39


In [10]:
...
# prepare the dataset of input to output pairs encoded as integers
seq_length = 300
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
	seq_in = raw_text[i:i + seq_length]
	seq_out = raw_text[i + seq_length]
	dataX.append([char_to_int[char] for char in seq_in])
	dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
print("Total Patterns: ", n_patterns)

Total Patterns:  1115094


In [11]:

...
# reshape X to be [samples, time steps, features]
X = np.reshape(dataX, (n_patterns, seq_length, 1))
# normalize
X = X / float(n_vocab)
# one hot encode the output variable
y = to_categorical(dataY)

In [12]:
# define the LSTM model
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='sigmoid'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [13]:
...
# define the checkpoint
filepath="weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

In [14]:
model.fit(X, y, epochs=23, batch_size=500, callbacks=callbacks_list)

Epoch 1/23
Epoch 1: loss improved from inf to 2.84842, saving model to weights-improvement-01-2.8484.hdf5
Epoch 2/23
Epoch 2: loss improved from 2.84842 to 2.63143, saving model to weights-improvement-02-2.6314.hdf5
Epoch 3/23
Epoch 3: loss improved from 2.63143 to 2.49937, saving model to weights-improvement-03-2.4994.hdf5
Epoch 4/23
Epoch 4: loss improved from 2.49937 to 2.41510, saving model to weights-improvement-04-2.4151.hdf5
Epoch 5/23
Epoch 5: loss improved from 2.41510 to 2.35099, saving model to weights-improvement-05-2.3510.hdf5
Epoch 6/23
Epoch 6: loss improved from 2.35099 to 2.29802, saving model to weights-improvement-06-2.2980.hdf5
Epoch 7/23
Epoch 7: loss improved from 2.29802 to 2.25334, saving model to weights-improvement-07-2.2533.hdf5
Epoch 8/23
Epoch 8: loss improved from 2.25334 to 2.21495, saving model to weights-improvement-08-2.2149.hdf5
Epoch 9/23
Epoch 9: loss improved from 2.21495 to 2.18204, saving model to weights-improvement-09-2.1820.hdf5
Epoch 10/23
Ep

<keras.callbacks.History at 0x7f86785e14d0>

In [16]:
# load the network weights
filename = "weights-improvement-23-1.9557.hdf5"
model.load_weights(filename)
model.compile(loss='categorical_crossentropy', optimizer='adam')
int_to_char = dict((i, c) for i, c in enumerate(chars))

In [17]:
...
# pick a random seed
start = np.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print("Seed:")
print("\"", ''.join([int_to_char[value] for value in pattern]), "\"")
# generate characters
for i in range(1000):
	x = np.reshape(pattern, (1, len(pattern), 1))
	x = x / float(n_vocab)
	prediction = model.predict(x, verbose=0)
	index = np.argmax(prediction)
	result = int_to_char[index]
	seq_in = [int_to_char[value] for value in pattern]
	sys.stdout.write(result)
	pattern.append(index)
	pattern = pattern[1:len(pattern)]
print("\nDone.")

Seed:
" t got it, if thou hast
the ordering of the mind too, 'mongst all colours
no yellow in't, lest she suspect, as he does,
her children not her husband's!

leontes:
a gross hag
and, lozel, thou art worthy to be hang'd,
that wilt not stay her tongue.

antigonus:
hang all the husbands
that cannot do that  "
the matter of the cattee
and the case of the catte to the case of the catte
that ia the terte that ia the tiree toa the wirless of the caat of the cattee thene the cattee of the cattee
of the tooeue of the catte of the casest sartert
that the mar have benneved the sooe that ha the mane
the matter of the sireer of the world thene
that ha the marter of the sarter of the couste
of the case of the catte of the casest sartert
that the mar have benneved the sooe that ha the mane
the matter of the wirlens of the world the catte
the rartine of the wirlens of the wirless
of the world sare the catte of the casest sareer
that the mar have benneved the sooe that have aelit
the partien of the 

# New Section