In [1]:
#import dependencies

import numpy
import sys
import nltk
nltk.download('stopwords')
from nltk.tokenize import RegexpTokenizer
from nltk.corpus import stopwords
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.utils import np_utils
from keras.callbacks import ModelCheckpoint

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [2]:
file = open("republic.txt").read()

In [3]:
#tokenization
#standardization
def tokenize_words(input):
    input = input.lower()
    tokenizer = RegexpTokenizer(r'\w+')
    tokens = tokenizer.tokenize(input)
    filtered = filter(lambda token:token not in stopwords.words('english'),tokens)
    return " ".join(filtered)
processed_inputs = tokenize_words(file)

In [4]:
#chars to numbers
chars= sorted(list(set(processed_inputs)))
char_to_num = dict((c,i) for i,c in enumerate(chars))

In [5]:
#check if words to chars or chars to num has worked
input_len = len(processed_inputs)
vocab_len = len(chars)
print("Total number of characters:",input_len)
print("Total vocab :",vocab_len)

Total number of characters: 679895
Total vocab : 37


In [6]:
#seq_length
seq_length = 100
x_data =[]
y_data =[]

In [7]:
#loop through the sequence
for i in range(0,input_len -seq_length,1):
    in_seq = processed_inputs[i:i + seq_length]
    out_seq = processed_inputs[i + seq_length]
    x_data.append([char_to_num[char] for char in in_seq])
    y_data.append(char_to_num[out_seq])
n_patterns = len(x_data)
print ("Total Patterns:", n_patterns)

    

Total Patterns: 679795


In [8]:
#convert input sequence to np array and so on
X = numpy.reshape(x_data, (n_patterns, seq_length, 1))
X = X/float(vocab_len)

In [9]:
#one-hot-encoding
y = np_utils.to_categorical(y_data)

In [10]:
#creating the model
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(256, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(128))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))

In [11]:
#compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam')


In [12]:
#saving the weights
filepath = "model_weights_saved.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
desired_callbacks = [checkpoint]

In [13]:
#fit model and train
model.fit(X, y, epochs=3, batch_size=256, callbacks=desired_callbacks)


Epoch 1/3
Epoch 00001: loss improved from inf to 2.66767, saving model to model_weights_saved.hdf5
Epoch 2/3
Epoch 00002: loss improved from 2.66767 to 2.22154, saving model to model_weights_saved.hdf5
Epoch 3/3
Epoch 00003: loss improved from 2.22154 to 2.01918, saving model to model_weights_saved.hdf5


<tensorflow.python.keras.callbacks.History at 0x2a2336218e0>

In [14]:
filename = "model_weights_saved.hdf5"
model.load_weights(filename)
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [15]:
num_to_char = dict((i, c) for i, c in enumerate(chars))


In [18]:
start = numpy.random.randint(0, len(x_data) - 1)
pattern = x_data[start]
print("Random Seed:")
print("\"", ''.join([num_to_char[value] for value in pattern]), "\"")

Random Seed:
" er never come added actually saw terrible sight entrance chasm reascend ardiaeus appeared sinners ty "


In [19]:
for i in range(1000):
    x = numpy.reshape(pattern, (1, len(pattern), 1))
    x = x / float(vocab_len)
    prediction = model.predict(x, verbose=0)
    index = numpy.argmax(prediction)
    result = num_to_char[index]

    sys.stdout.write(result)

    pattern.append(index)
    pattern = pattern[1:len(pattern)]

rant sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sealing sea