In [1]:
import pandas as pd
import numpy as np
import string

from keras.models import model_from_json
from keras.preprocessing.text import Tokenizer
import keras.utils as ku
from keras.utils import pad_sequences

In [2]:
df = pd.read_csv('eng_quotes.csv')
df = df[df["quote"].str.contains('\d') == False]
quotes_list = []

for i in df['quote']:
    quotes_list.append(i)

In [3]:
#Remove Punctuation
def remove_punctuation(text):
  punct = string.punctuation
  punct = punct.replace("'","")
  text = text.translate(str.maketrans("", "",punct))
  return text

#Lowercase
def text_lower(text):
  text = text.lower()
  return text

cleaned_quotes = []
for i in quotes_list:
  text = remove_punctuation(i)
  text = text_lower(str(text))
  cleaned_quotes.append(text)

In [4]:
# Tokeinization
tokenizer = Tokenizer()

# Function to create the sequences
def generate_sequences(corpus):
    tokenizer.fit_on_texts(corpus)
    total_words = len(tokenizer.word_index) + 1
    print(f"Total unique words in the text corpus: {total_words}")
    input_sequences = []
    for line in corpus:
        seq = tokenizer.texts_to_sequences([line])[0]
        for i in range(1, len(seq)):
            ngram_seq = seq[:i+1]
            input_sequences.append(ngram_seq)
            
    return input_sequences, total_words

# Generating sequences
input_sequences, total_words = generate_sequences(cleaned_quotes)
input_sequences[:5]

Total unique words in the text corpus: 7689


[[12, 59],
 [12, 59, 215],
 [12, 59, 215, 167],
 [12, 59, 215, 167, 5],
 [12, 59, 215, 167, 5, 509]]

In [5]:
# Generating predictors and labels from the padded sequences
def generate_input_sequence(input_sequences):
    maxlen = max([len(x) for x in input_sequences])
    input_sequences = pad_sequences(input_sequences, maxlen=maxlen)
    predictors, label = input_sequences[:, :-1], input_sequences[:, -1]
    label = ku.to_categorical(label, num_classes=total_words)
    return predictors, label, maxlen

predictors, label, maxlen = generate_input_sequence(input_sequences)

In [6]:
file = open('eng_quote_model.json', 'r')
loaded  = file.read()
file.close()

model = model_from_json(loaded)
model.load_weights("eng_quote_model.h5")

In [7]:
# Text generating function
def generate_quote(seed_text, num_words, model, maxlen):
    
    for _ in range(num_words):
        tokens = tokenizer.texts_to_sequences([seed_text])[0]
        tokens = pad_sequences([tokens], maxlen=maxlen, padding='pre')
        
        predicted = np.argmax(model.predict(tokens))
        
        output_word = ''
        
        for word, index in tokenizer.word_index.items():
            if index == predicted:
                output_word = word
                break
        seed_text = seed_text + " " + output_word
    
    return seed_text

In [8]:
print(generate_quote("kill", num_words = 4, model= model, maxlen=maxlen-1))

kill your life and mine


In [14]:
print(generate_quote("suicude", num_words = 5, model= model, maxlen=maxlen-1))

suicude is the handmaiden of creativity


In [21]:
print(generate_quote("happy", num_words = 3, model= model, maxlen=maxlen-1))

happy does not exist


In [12]:
print(generate_quote("passion", num_words = 7, model= model, maxlen=maxlen-1))

passion is a pesky part of being human


In [37]:
print(generate_quote("love", num_words = 5, model= model, maxlen=maxlen-1))

love is the key to life


In [14]:
print(generate_quote("Beauty", num_words = 13, model= model, maxlen=maxlen-1))

Beauty is not the absence of problems it's the ability to deal with them
