# Libraries

In [1]:
import numpy as np
import pandas as pd

In [2]:
from nltk.tokenize import word_tokenize
from keras.preprocessing.text import Tokenizer

Using TensorFlow backend.


In [3]:
from keras.utils import to_categorical

In [4]:
from keras.models import Sequential
from keras.layers import Dense, LSTM, Embedding
from keras.callbacks import EarlyStopping, ModelCheckpoint

In [5]:
import pickle

In [6]:
pd.options.display.max_colwidth = -1

# Data

In [7]:
data = pd.read_csv('E:/Scrapped-Data/Quotes-Goodreads/Popular.csv')

In [8]:
data

Unnamed: 0,Quote
0,"“Don't cry because it's over, smile because it happened.” ― Dr. Seuss"
1,"“I'm selfish, impatient and a little insecure. I make mistakes, I am out of control and at times hard to handle. But if you can't handle me at my worst, then you sure as hell don't deserve me at my best.” ― Marilyn Monroe"
2,“Be yourself; everyone else is already taken.” ― Oscar Wilde
3,“Two things are infinite: the universe and human stupidity; and I'm not sure about the universe.” ― Albert Einstein
4,"“So many books, so little time.” ― Frank Zappa"
...,...
2995,“What can you do to promote world peace? Go home and love your family.” ― Mother Teresa
2996,"“. . . when it comes down to it, that’s what life is all about: showing up for the people you love, again and again, until you can’t show up anymore.” ― Rebecca Walker, Baby Love: Choosing Motherhood After a Lifetime of Ambivalence //"
2997,"“Just tell me how to be different in a way that makes sense.” ― Stephen Chbosky, The Perks of Being a Wallflower //"
2998,"“sometimes you don't need a goal in life, you don't need to know the big picture. you just need to know what you're going to do next!” ― Sophie Kinsella, The Undomestic Goddess //"


# Cleaning

In [9]:
def clean(x):
    x = x.split('―')[0]
    x = x[1:-2]
    return x+ ' endquote'

In [10]:
data['Quote'] = data['Quote'].apply(clean)

In [11]:
data

Unnamed: 0,Quote
0,"Don't cry because it's over, smile because it happened. endquote"
1,"I'm selfish, impatient and a little insecure. I make mistakes, I am out of control and at times hard to handle. But if you can't handle me at my worst, then you sure as hell don't deserve me at my best. endquote"
2,Be yourself; everyone else is already taken. endquote
3,Two things are infinite: the universe and human stupidity; and I'm not sure about the universe. endquote
4,"So many books, so little time. endquote"
...,...
2995,What can you do to promote world peace? Go home and love your family. endquote
2996,". . . when it comes down to it, that’s what life is all about: showing up for the people you love, again and again, until you can’t show up anymore. endquote"
2997,Just tell me how to be different in a way that makes sense. endquote
2998,"sometimes you don't need a goal in life, you don't need to know the big picture. you just need to know what you're going to do next! endquote"


# Preprocessing

In [12]:
tokens = word_tokenize('\n'.join(data['Quote']))

In [13]:
train_len = 4+1
text_sequences = []
for i in range(train_len, len(tokens)+1):
    seq = tokens[i-train_len:i]
    text_sequences.append(seq)    

In [14]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(text_sequences)

In [15]:
vocabulary_size = len(tokenizer.index_word)+1
print(vocabulary_size)

8539


In [16]:
sequences = tokenizer.texts_to_sequences(text_sequences)
sequences = np.array(sequences)

In [17]:
X = sequences[:, :-1]
y = sequences[:, -1]
y = to_categorical(y, num_classes=vocabulary_size)

sequence_len = train_len-1

# Training

In [21]:
es = EarlyStopping(monitor='loss', mode='min', verbose=1)
filepath = "model.h5"
ckpt = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')

def build_network():
    model = Sequential()
    model.add(Embedding(vocabulary_size, 128, input_length=sequence_len))
    model.add(LSTM(1024, dropout=0.2, recurrent_dropout=0.2))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(vocabulary_size, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    model.summary()
    return model

In [22]:
model = build_network()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 4, 128)            1092992   
_________________________________________________________________
lstm_2 (LSTM)                (None, 1024)              4722688   
_________________________________________________________________
dense_3 (Dense)              (None, 32)                32800     
_________________________________________________________________
dense_4 (Dense)              (None, 8539)              281787    
Total params: 6,130,267
Trainable params: 6,130,267
Non-trainable params: 0
_________________________________________________________________


In [23]:
model.fit(X, y, validation_split=0.3, epochs=500, callbacks=[es, ckpt])

Instructions for updating:
Use tf.cast instead.
Train on 76888 samples, validate on 32953 samples
Epoch 1/500

Epoch 00001: loss improved from inf to 6.10318, saving model to model.h5
Epoch 2/500

Epoch 00002: loss improved from 6.10318 to 5.46849, saving model to model.h5
Epoch 3/500

Epoch 00003: loss improved from 5.46849 to 5.09949, saving model to model.h5
Epoch 4/500

Epoch 00004: loss improved from 5.09949 to 4.86418, saving model to model.h5
Epoch 5/500

Epoch 00005: loss improved from 4.86418 to 4.67969, saving model to model.h5
Epoch 6/500

Epoch 00006: loss improved from 4.67969 to 4.51927, saving model to model.h5
Epoch 7/500

Epoch 00007: loss improved from 4.51927 to 4.36715, saving model to model.h5
Epoch 8/500

Epoch 00008: loss improved from 4.36715 to 4.21545, saving model to model.h5
Epoch 9/500

Epoch 00009: loss improved from 4.21545 to 4.07267, saving model to model.h5
Epoch 10/500

Epoch 00010: loss improved from 4.07267 to 3.93838, saving model to model.h5
Epoch

  % delta_t_median)



Epoch 00017: loss improved from 3.26325 to 3.17493, saving model to model.h5
Epoch 18/500

Epoch 00018: loss improved from 3.17493 to 3.09506, saving model to model.h5
Epoch 19/500

Epoch 00019: loss improved from 3.09506 to 3.02070, saving model to model.h5
Epoch 20/500

Epoch 00020: loss improved from 3.02070 to 2.94656, saving model to model.h5
Epoch 21/500

Epoch 00021: loss improved from 2.94656 to 2.88735, saving model to model.h5
Epoch 22/500

Epoch 00022: loss improved from 2.88735 to 2.82075, saving model to model.h5
Epoch 23/500

Epoch 00023: loss improved from 2.82075 to 2.76651, saving model to model.h5
Epoch 24/500

Epoch 00024: loss improved from 2.76651 to 2.71266, saving model to model.h5
Epoch 25/500

Epoch 00025: loss improved from 2.71266 to 2.65540, saving model to model.h5
Epoch 26/500

Epoch 00026: loss improved from 2.65540 to 2.61008, saving model to model.h5
Epoch 27/500

Epoch 00027: loss improved from 2.61008 to 2.56691, saving model to model.h5
Epoch 28/500


Epoch 00054: loss improved from 2.00867 to 2.00522, saving model to model.h5
Epoch 55/500

Epoch 00055: loss improved from 2.00522 to 1.98872, saving model to model.h5
Epoch 56/500

Epoch 00056: loss improved from 1.98872 to 1.97609, saving model to model.h5
Epoch 57/500

Epoch 00057: loss improved from 1.97609 to 1.95781, saving model to model.h5
Epoch 58/500

Epoch 00058: loss did not improve from 1.95781
Epoch 00058: early stopping


<keras.callbacks.History at 0x18af2cc26d8>

from keras.models import load_model
new_model = load_model("model.h5")