In [1]:
import string
import re
import keras
import tensorflow as tf
import numpy
from numpy import array

def EntirePreprocessingOfTextForLSTMModels(path_of_dataset, num_train_sequence_length):

	# load doc into memory
	def load_doc(filename):
		# open the file as read only
		file = open(filename, 'r')
		# read all text
		text = file.read()
		# close the file
		file.close()
		return text


	# load document
	in_filename = path_of_dataset
	doc = load_doc(in_filename)
	print(doc[:2000])


	def clean_text(text):
		# text = text.lower()
		text = re.sub('\[.*?\]', '', text)
		text = re.sub('https?://\S+|www\.\S+', '', text)
		text = re.sub('<.*?>+', '', text)
		text = re.sub('[%s]' % re.escape(string.punctuation), '', text)
		text = re.sub('\n', '', text)
		text = re.sub('\w*\d\w*', '', text)
		return text

	doc_final = clean_text(doc)
	print(type(doc_final))


	# turn a doc into clean tokens
	def clean_doc(doc):
		# replace '--' with a space ' '
		doc = doc.replace('--', ' ')
		# split into tokens by white space
		tokens = doc.split()
		# remove punctuation from each token
		table = str.maketrans('', '', string.punctuation)
		tokens = [w.translate(table) for w in tokens]
		# remove remaining tokens that are not alphabetic
		tokens = [word for word in tokens if word.isalpha()]
		# make lower case
		tokens = [word.lower() for word in tokens]
		return tokens

	# clean document
	tokens = clean_doc(doc)
	print(tokens[:2000])

	print('Total Tokens: %d' % len(tokens))
	print('Unique Tokens: %d' % len(set(tokens)))


	# organize into sequences of tokens
	length = num_train_sequence_length + 1
	sequences = list()
	for i in range(length, len(tokens)):
		# select sequence of tokens
		seq = tokens[i-length:i]
		# convert into a line
		line = ' '.join(seq)
		# store
		sequences.append(line)
	print('Total Sequences: %d' % len(sequences))


	# save tokens to file, one dialog per line
	def save_doc(lines, filename):
		data = '\n'.join(lines)
		file = open(filename, 'w')
		file.write(data)
		file.close()
	
	# save sequences to file
	out_filename = './republic_sequences.txt'
	save_doc(sequences, out_filename)


	# load doc into memory
	def load_doc(filename):
		# open the file as read only
		file = open(filename, 'r')
		# read all text
		text = file.read()
		# close the file
		file.close()
		return text

	# load
	in_filename = '/content/republic_sequences.txt'
	doc = load_doc(in_filename)
	lines = doc.split('\n')


	# integer encode sequences of words
	tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=None,
													filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',
													lower=True,
													split=' ')

	tokenizer.fit_on_texts(lines)
	sequences = tokenizer.texts_to_sequences(lines)

	# vocabulary size
	vocab_size = len(tokenizer.word_index) + 1

	# separate into input and output
	sequences = array(sequences)
	X, y = sequences[:,:-1], sequences[:,-1]
	y = tf.keras.utils.to_categorical(y, num_classes=vocab_size)
	seq_length = X.shape[1]

	return X, y, seq_length, vocab_size


def ResidualBidirectionalCuDNNLSTM(inputtokens, vocabsize, layers, units, dropout, dropout_embedding, embedding):

    input_ = tf.keras.layers.Input(shape=(inputtokens,), dtype='int32')
        
    # Embedding layer
    net = tf.keras.layers.Embedding(input_dim=vocabsize, output_dim=embedding, input_length=inputtokens)(input_)
    net = tf.keras.layers.Dropout(dropout_embedding)(net)
            
    # Bidirectional LSTM layer
    net = tf.keras.layers.BatchNormalization()(net)
    net = tf.keras.layers.Bidirectional(tf.compat.v1.keras.layers.CuDNNLSTM(units, return_sequences=(layers > 1)))(net)
    net = tf.keras.layers.Dropout(dropout)(net)
            
    # Rest of LSTM layers with residual connections (if any)
    for i in range(1, layers):
        if i < layers-1:
            block = tf.keras.layers.BatchNormalization()(net)
            block = tf.compat.v1.keras.layers.CuDNNLSTM(2*units, return_sequences=True)(block)
            block = tf.keras.layers.Dropout(dropout)(block)
            net = tf.keras.layers.add([block, net])
        else:
            net = tf.keras.layers.BatchNormalization()(net)
            net = tf.compat.v1.keras.layers.CuDNNLSTM(2*units)(net)
            net = tf.keras.layers.Dropout(dropout)(net)
                    
    # Output layer
    net = tf.keras.layers.Dense(vocabsize, activation='softmax')(net)
    model = tf.keras.Model(inputs=input_, outputs=net)

    return model 


X, y, sequence_length, vocabulary_size = EntirePreprocessingOfTextForLSTMModels(path_of_dataset='/content/drive/MyDrive/Twitter-Sentiment-Analysis/Word-Level-Text-Generation/Plato.txt', 
                                                                				num_train_sequence_length=128)


model = ResidualBidirectionalCuDNNLSTM(128, vocabulary_size, 9, 16, 0.15, 0, 32)



The Project Gutenberg eBook of The Republic, by Plato

This eBook is for the use of anyone anywhere in the United States and
most other parts of the world at no cost and with almost no restrictions
whatsoever. You may copy it, give it away or re-use it under the terms
of the Project Gutenberg License included with this eBook or online at
www.gutenberg.org. If you are not located in the United States, you
will have to check the laws of the country where you are located before
using this eBook.

Title: The Republic

Author: Plato

Translator: B. Jowett

Release Date: October, 1998 [eBook #1497]
[Most recently updated: September 11, 2021]

Language: English


Produced by: Sue Asscher and David Widger

*** START OF THE PROJECT GUTENBERG EBOOK THE REPUBLIC ***




THE REPUBLIC

By Plato

Translated by Benjamin Jowett

Note: See also “The Republic” by Plato, Jowett, eBook #150


Contents

 INTRODUCTION AND ANALYSIS.
 THE REPUBLIC.
 PERSONS OF THE DIALOGUE.
 BOOK I.
 BOOK II.
 BOOK III.
 BOOK

In [2]:
print("The shape of dataset is : {}".format(X.shape))
print("The shape of targets is : {}".format(y.shape))

The shape of dataset is : (216245, 128)
The shape of targets is : (216245, 10488)


In [3]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 128)]        0           []                               
                                                                                                  
 embedding (Embedding)          (None, 128, 32)      335616      ['input_1[0][0]']                
                                                                                                  
 dropout (Dropout)              (None, 128, 32)      0           ['embedding[0][0]']              
                                                                                                  
 batch_normalization (BatchNorm  (None, 128, 32)     128         ['dropout[0][0]']                
 alization)                                                                                   

In [None]:
import re  
import string

def clean_text(text):
    # text = text.lower()
    text = re.sub('\[.*?\]', '', text)
    text = re.sub('https?://\S+|www\.\S+', '', text)
    text = re.sub('<.*?>+', '', text)
    text = re.sub('[%s]' % re.escape(string.punctuation), '', text)
    text = re.sub('\n', '', text)
    text = re.sub('\w*\d\w*', '', text)
    return text

doc_final = clean_text(doc)
print(type(doc_final))

In [None]:
import string

# turn a doc into clean tokens
def clean_doc(doc):
	# replace '--' with a space ' '
	doc = doc.replace('--', ' ')
	# split into tokens by white space
	tokens = doc.split()
	# remove punctuation from each token
	table = str.maketrans('', '', string.punctuation)
	tokens = [w.translate(table) for w in tokens]
	# remove remaining tokens that are not alphabetic
	tokens = [word for word in tokens if word.isalpha()]
	# make lower case
	tokens = [word.lower() for word in tokens]
	return tokens


# clean document
tokens = clean_doc(doc)
print(tokens[:2000])

print('Total Tokens: %d' % len(tokens))
print('Unique Tokens: %d' % len(set(tokens)))

In [None]:
# organize into sequences of tokens
length = 175 + 1
sequences = list()
for i in range(length, len(tokens)):
	# select sequence of tokens
	seq = tokens[i-length:i]
	# convert into a line
	line = ' '.join(seq)
	# store
	sequences.append(line)
print('Total Sequences: %d' % len(sequences))



In [None]:
# save tokens to file, one dialog per line
def save_doc(lines, filename):
	data = '\n'.join(lines)
	file = open(filename, 'w')
	file.write(data)
	file.close()
 
# save sequences to file
out_filename = './republic_sequences.txt'
save_doc(sequences, out_filename)



In [None]:
# load doc into memory
def load_doc(filename):
	# open the file as read only
	file = open(filename, 'r')
	# read all text
	text = file.read()
	# close the file
	file.close()
	return text

# load
in_filename = '/content/republic_sequences.txt'
doc = load_doc(in_filename)
lines = doc.split('\n')



In [None]:
import tensorflow as tf
import keras

In [None]:
# integer encode sequences of words
tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=None,
                                                  filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',
                                                  lower=True,
                                                  split=' ')

tokenizer.fit_on_texts(lines)
sequences = tokenizer.texts_to_sequences(lines)

In [None]:
len(sequences)

In [None]:
# vocabulary size
vocab_size = len(tokenizer.word_index) + 1

In [None]:
print("Vocabulary Size is : {}".format(vocab_size))

In [None]:
import tensorflow.keras as keras
import tensorflow as tf


In [None]:
from numpy import array
from pickle import dump
from keras.preprocessing.text import Tokenizer
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Embedding


# define model
model = Sequential()

model.add(Embedding(vocabulary_size, 50, input_length=sequence_length))

model.add(tf.compat.v1.keras.layers.CuDNNLSTM(175, return_sequences=False))
model.add(tf.keras.layers.BatchNormalization(axis=-1,
                                            momentum=0.99,
											epsilon=0.001,))
model.add(tf.keras.layers.Dropout(rate=0.10))

model.add(Dense(125, activation='relu'))
model.add(tf.keras.layers.BatchNormalization(axis=-1,
                                            momentum=0.99,
											epsilon=0.001,))
model.add(tf.keras.layers.Dropout(rate=0.20))

model.add(Dense(75, activation='relu'))
model.add(tf.keras.layers.BatchNormalization(axis=-1,
                                            momentum=0.99,
											epsilon=0.001,))
model.add(tf.keras.layers.Dropout(rate=0.10))

model.add(Dense(vocabulary_size, activation='softmax'))

print(model.summary())
# compile model


model.compile(loss='categorical_crossentropy', 
              optimizer='adam', 
              metrics=['accuracy'])
# fit model


callback_1 = tf.keras.callbacks.EarlyStopping(monitor='loss', 
                                            patience=15)

reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', 
                                                factor=0.2,
												patience=5,
												min_lr=0.001)

history = model.fit(X, y, batch_size=256, 
                    epochs=100)



In [None]:
tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=None,
                                                  filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',
                                                  lower=True,
                                                  split=' ')
# save the model to file
model.save('./model-cudnnlstm.h5')
# save the tokenizer
dump(tokenizer, open('tokenizer.pkl', 'wb'))

In [None]:
from random import randint
from pickle import load
from keras.models import load_model
from keras.preprocessing.sequence import pad_sequences
import numpy as np

# load doc into memory
def load_doc(filename):
	# open the file as read only
	file = open(filename, 'r')
	# read all text
	text = file.read()
	# close the file
	file.close()
	return text

# generate a sequence from a language model
def generate_seq(model, tokenizer, seq_length, seed_text, n_words):
	result = list()
	in_text = seed_text
	# generate a fixed number of words
	for _ in range(n_words):
		# encode the text as integer
		encoded = tokenizer.texts_to_sequences([in_text])[0]
		# truncate sequences to a fixed length
		encoded = pad_sequences([encoded], maxlen=seq_length, truncating='pre')
		# predict probabilities for each word
		yhat = np.argmax(model.predict(encoded), axis=-1)
		# map predicted word index to word
		out_word = ''
		for word, index in tokenizer.word_index.items():
			if index == yhat:
				out_word = word
				break
		# append to input
		in_text += ' ' + out_word
		result.append(out_word)
	return ' '.join(result)

# load cleaned text sequences
in_filename = 'republic_sequences.txt'
doc = load_doc(in_filename)
lines = doc.split('\n')
seq_length = len(lines[0].split()) - 1

# load the model
model = load_model('/content/model-cudnnlstm.h5')

# load the tokenizer
tokenizer = load(open('/content/tokenizer.pkl', 'rb'))

# select a seed text
seed_text = lines[randint(0,len(lines))]
print(seed_text + '\n')

# generate new text
generated = generate_seq(model, tokenizer, seq_length, seed_text, 50)
print(generated)


In [None]:
def ResidualBidirectionalCuDNNLSTM(inputtokens, vocabsize, layers, units, dropout, embedding):

    input_ = tf.keras.layers.Input(shape=(inputtokens,), dtype='int32')
        
    # Embedding layer
    net = tf.keras.layers.Embedding(input_dim=vocabsize, output_dim=embedding, input_length=inputtokens)(input_)
    net = tf.keras.layers.Dropout(dropout)(net)
            
    # Bidirectional LSTM layer
    net = tf.keras.layers.BatchNormalization()(net)
    net = tf.keras.layers.Bidirectional(tf.compat.v1.keras.layers.CuDNNLSTM(units, return_sequences=(layers > 1)))(net)
    net = tf.keras.layers.Dropout(dropout)(net)
            
    # Rest of LSTM layers with residual connections (if any)
    for i in range(1, layers):
        if i < layers-1:
            block = tf.keras.layers.BatchNormalization()(net)
            block = tf.compat.v1.keras.layers.CuDNNLSTM(2*units, return_sequences=True)(block)
            block = tf.keras.layers.Dropout(dropout)(block)
            net = tf.keras.layers.add([block, net])
        else:
            net = tf.keras.layers.BatchNormalization()(net)
            net = tf.compat.v1.keras.layers.CuDNNLSTM(2*units)(net)
            net = tf.keras.layers.Dropout(dropout)(net)
                    
    # Output layer
    net = tf.keras.layers.Dense(vocabsize, activation='softmax')(net)
    model = tf.keras.Model(inputs=input_, outputs=net)

    return model 

model = ResidualBidirectionalCuDNNLSTM(50, vocabulary_size, 1, 64, 0, 128)

In [4]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 128)]        0           []                               
                                                                                                  
 embedding (Embedding)          (None, 128, 32)      335616      ['input_1[0][0]']                
                                                                                                  
 dropout (Dropout)              (None, 128, 32)      0           ['embedding[0][0]']              
                                                                                                  
 batch_normalization (BatchNorm  (None, 128, 32)     128         ['dropout[0][0]']                
 alization)                                                                                   

In [4]:

model.compile(loss='categorical_crossentropy', 
              optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), 
              metrics=['accuracy'])
# fit model

callback_1 = tf.keras.callbacks.EarlyStopping(monitor='loss', 
                                            patience=15)

reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='loss', 
                                                factor=0.2,
												patience=5,
												min_lr=0.001)

history = model.fit(
    X, y, batch_size=256, 
    epochs=100, 
    callbacks=[callback_1, 
                reduce_lr]
                    )


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

#### The actual text generation code with Tensorflow

In [7]:
from numpy import array
from pickle import dump
from keras.preprocessing.text import Tokenizer
tokenizer = Tokenizer()

# save the model to file
model.save('model.h5')
# save the tokenizer
dump(tokenizer, open('tokenizer.pkl', 'wb'))

In [None]:
import string

# load doc into memory
def load_doc(filename):
	# open the file as read only
	file = open(filename, 'r')
	# read all text
	text = file.read()
	# close the file
	file.close()
	return text

# turn a doc into clean tokens
def clean_doc(doc):
	# replace '--' with a space ' '
	doc = doc.replace('--', ' ')
	# split into tokens by white space
	tokens = doc.split()
	# remove punctuation from each token
	table = str.maketrans('', '', string.punctuation)
	tokens = [w.translate(table) for w in tokens]
	# remove remaining tokens that are not alphabetic
	tokens = [word for word in tokens if word.isalpha()]
	# make lower case
	tokens = [word.lower() for word in tokens]
	return tokens

# save tokens to file, one dialog per line
def save_doc(lines, filename):
	data = '\n'.join(lines)
	file = open(filename, 'w')
	file.write(data)
	file.close()

# load document
in_filename ='/content/drive/MyDrive/Twitter-Sentiment-Analysis/Word-Level-Text-Generation/Plato.txt'
doc = load_doc(in_filename)
print(doc[:200])

# clean document
tokens = clean_doc(doc)
print(tokens[:200])
print('Total Tokens: %d' % len(tokens))
print('Unique Tokens: %d' % len(set(tokens)))

# organize into sequences of tokens
length = 50 + 1
sequences = list()
for i in range(length, len(tokens)):
	# select sequence of tokens
	seq = tokens[i-length:i]
	# convert into a line
	line = ' '.join(seq)
	# store
	sequences.append(line)
print('Total Sequences: %d' % len(sequences))

# save sequences to file
out_filename = 'republic_sequences.txt'
save_doc(sequences, out_filename)

The Project Gutenberg eBook of The Republic, by Plato

This eBook is for the use of anyone anywhere in the United States and
most other parts of the world at no cost and with almost no restrictions
wh
['the', 'project', 'gutenberg', 'ebook', 'of', 'the', 'republic', 'by', 'plato', 'this', 'ebook', 'is', 'for', 'the', 'use', 'of', 'anyone', 'anywhere', 'in', 'the', 'united', 'states', 'and', 'most', 'other', 'parts', 'of', 'the', 'world', 'at', 'no', 'cost', 'and', 'with', 'almost', 'no', 'restrictions', 'whatsoever', 'you', 'may', 'copy', 'it', 'give', 'it', 'away', 'or', 'reuse', 'it', 'under', 'the', 'terms', 'of', 'the', 'project', 'gutenberg', 'license', 'included', 'with', 'this', 'ebook', 'or', 'online', 'at', 'wwwgutenbergorg', 'if', 'you', 'are', 'not', 'located', 'in', 'the', 'united', 'states', 'you', 'will', 'have', 'to', 'check', 'the', 'laws', 'of', 'the', 'country', 'where', 'you', 'are', 'located', 'before', 'using', 'this', 'ebook', 'title', 'the', 'republic', 'author',

In [None]:
from numpy import array
from pickle import dump
from keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Embedding

# load doc into memory
def load_doc(filename):
	# open the file as read only
	file = open(filename, 'r')
	# read all text
	text = file.read()
	# close the file
	file.close()
	return text

# load
in_filename = 'republic_sequences.txt'
doc = load_doc(in_filename)
lines = doc.split('\n')

# integer encode sequences of words
tokenizer = Tokenizer()
tokenizer.fit_on_texts(lines)
sequences = tokenizer.texts_to_sequences(lines)
# vocabulary size
vocab_size = len(tokenizer.word_index) + 1

# separate into input and output
sequences = array(sequences)
X, y = sequences[:,:-1], sequences[:,-1]
y = to_categorical(y, num_classes=vocab_size)
seq_length = X.shape[1]

# define model
model = Sequential()
model.add(Embedding(vocab_size, 50, input_length=seq_length))
model.add(LSTM(100, return_sequences=True))
model.add(LSTM(100))
model.add(Dense(100, activation='relu'))
model.add(Dense(vocab_size, activation='softmax'))
print(model.summary())
# compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# fit model
model.fit(X, y, batch_size=128, epochs=100)

# save the model to file
model.save('model.h5')
# save the tokenizer
dump(tokenizer, open('tokenizer.pkl', 'wb'))


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 50, 50)            524400    
                                                                 
 lstm (LSTM)                 (None, 50, 100)           60400     
                                                                 
 lstm_1 (LSTM)               (None, 100)               80400     
                                                                 
 dense (Dense)               (None, 100)               10100     
                                                                 
 dense_1 (Dense)             (None, 10488)             1059288   
                                                                 
Total params: 1,734,588
Trainable params: 1,734,588
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch

In [None]:
from random import randint
from pickle import load
from keras.models import load_model
from keras.preprocessing.sequence import pad_sequences
import numpy as np

def AbstractiveLSTMTextGenerator(input_seed_text, max_length, text_file_cleaned, model_path, tokenizer_path):

    # load doc into memory
    def load_doc(filename):
        # open the file as read only
        file = open(filename, 'r')
        # read all text
        text = file.read()
        # close the file
        file.close()
        return text

    # generate a sequence from a language model
    def generate_seq(model, tokenizer, seq_length, seed_text, n_words):
        result = list()
        in_text = seed_text
        # generate a fixed number of words
        for _ in range(n_words):
            # encode the text as integer
            encoded = tokenizer.texts_to_sequences([in_text])[0]
            # truncate sequences to a fixed length
            encoded = pad_sequences([encoded], maxlen=seq_length, truncating='pre')
            # predict probabilities for each word
            yhat = np.argmax(model.predict(encoded), axis=-1)
            # map predicted word index to word
            out_word = ''
            for word, index in tokenizer.word_index.items():
                if index == yhat:
                    out_word = word
                    break
            # append to input
            in_text += ' ' + out_word
            result.append(out_word)
        return ' '.join(result)


    # load cleaned text sequences
    in_filename = text_file_cleaned
    doc = load_doc(in_filename)
    lines = doc.split('\n')
    seq_length = len(lines[0].split()) - 1

    # load the model
    model = load_model(model_path)

    # load the tokenizer
    tokenizer = load(open(tokenizer_path, 'rb'))

    # # select a seed text
    # seed_text = lines[randint(0,len(lines))]
    # print(seed_text + '\n')

    seed_text = input_seed_text

    # generate new text
    generated = generate_seq(model, tokenizer, seq_length, seed_text, max_length)

    print(f"The AI generated text is : {generated}")

    text_final = seed_text + " " + generated
    print("\n")
    print("The entire text is : {}".format(text_final))

    return text_final



In [None]:

SEED = 'The man was constantly shooting with his gun'

generated_text = AbstractiveLSTMTextGenerator(input_seed_text=SEED, 
                                              max_length=175, 
                                              text_file_cleaned='republic_sequences.txt', 
                                              model_path='model.h5', tokenizer_path='tokenizer.pkl')



The AI generated text is : own age and the companion of the world and the remainder of the human race in the clouds and the other absorbed for the practicability of the soul is constructed and sung by degrees the scattered institution of inconceivable technical diseases in the same part posted with the analysis of the pretence to the report of the earlier faculties in the republic the second and oblong cause milton in the republic the desirableness of mind is the rim circle pages by side arithmetical or denounced the truth in colour eg debt and andromache and the rest of the theory of human infant the philosopher is sufficiently evident the true helmsmen to them in conformity to falsehood the world of lectures and the most miserable of days is the entire christians is apt to blink at the affairs of the sexes and of xenophon and at first sight is only a logical difficulty in accordance with the analysis of the human race the illustrations of the republic is developed out of the uncertai

In [None]:
generated_text

'The man was constantly shooting with his gun own age and the companion of the world and the remainder of the human race in the clouds and the other absorbed for the practicability of the soul is constructed and sung by degrees the scattered institution of inconceivable technical diseases in the same part posted with the analysis of the pretence to the report of the earlier faculties in the republic the second and oblong cause milton in the republic the desirableness of mind is the rim circle pages by side arithmetical or denounced the truth in colour eg debt and andromache and the rest of the theory of human infant the philosopher is sufficiently evident the true helmsmen to them in conformity to falsehood the world of lectures and the most miserable of days is the entire christians is apt to blink at the affairs of the sexes and of xenophon and at first sight is only a logical difficulty in accordance with the analysis of the human race the illustrations of the republic is developed 