In [6]:
from numpy import array
from pickle import dump
from tensorflow import keras
from keras.preprocessing.text import Tokenizer
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Embedding

In [7]:
# load into memory
def load_doc(filename):
    file=open(filename,'r')
    text = file.read()
    file.close()
    return text

In [10]:
# Define model
def define_model(vocab_size,seq_length):
    model = Sequential()
    model.add(Embedding(vocab_size,50,input_length=seq_length))
    model.add(LSTM(100,return_sequences=True))
    model.add(LSTM(100))
    model.add(Dense(100,activation='relu'))
    model.add(Dense(vocab_size,activation='softmax'))
    # Compile network
    model.compile(loss='categorical_crossentropy',optimizer = 'adam',metrics=['accuracy'])
    # Summary
    model.summary()
    return model

In [11]:
# Load
in_filename = 'republic_sequences.txt'
doc = load_doc(in_filename)
lines = doc.split('\n')
# integer encode sequences of word
tokenizer = Tokenizer()
tokenizer.fit_on_texts(lines)
sequences = tokenizer.texts_to_sequences(lines)
# Vocabulary size
vocab_size = len(tokenizer.word_index)+1
# Separate into input and output
sequences = array(sequences)
X,y = sequences[:,:-1],sequences[:,-1]
y = to_categorical(y,num_classes=vocab_size)
seq_length = X.shape[1]
# Define model
model = define_model(vocab_size,seq_length)
# Fit model
model.fit(X,y,batch_size = 128,epochs=100)
# save the model
model.save('model.h5')
# save the tokenizer
dump(tokenizer,open('tokenizer.pkl','wb'))

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, 50, 50)            391900    
_________________________________________________________________
lstm_5 (LSTM)                (None, 50, 100)           60400     
_________________________________________________________________
lstm_6 (LSTM)                (None, 100)               80400     
_________________________________________________________________
dense_5 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_6 (Dense)              (None, 7838)              791638    
Total params: 1,334,438
Trainable params: 1,334,438
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/10

Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
