 # Imports

In [2]:
import gensim
import nltk
import keras
import numpy as np
from keras.layers import LSTM, Dense, Activation
from keras.optimizers import SGD
from keras.activations import softmax
from keras.models import Sequential
from gensim import corpora, models, similarities
from nltk import word_tokenize, sent_tokenize
from nltk.corpus import stopwords

Using TensorFlow backend.


# Loading the data

In [3]:
data = open('data/Alice in Wonderland.txt').read()

# Sentence tokenizer and deleting extra information from text

In [4]:
sentences = [sent for sent in sent_tokenize(data)]

# Word tokenizeing

In [5]:
words = [nltk.word_tokenize(sentence) for sentence in sentences]

# Creating a word2vec gensim model

In [6]:
word_model = gensim.models.Word2Vec(words, min_count = 1, size = 50)

# Getting word2vec representation of all the words

In [7]:
all_words = []
word_to_vec = []
for each_sentence in words:
    for each_word in each_sentence:
        all_words.append(each_word)
for each_word in all_words:
    word_to_vec.append(word_model[each_word])

# Making sequences of 100 

In [8]:
input_data = []
output_data = []
for index in range(len(word_to_vec) - 101):
    sequence = word_to_vec[index : index + 100]
    input_data.append(sequence)
    y = word_to_vec[index + 101]
    output_data.append(y)

# Splitting the data in train/test (80-20)

In [9]:
splitting_length = int(len(input_data) * .80)
x_train = np.array(input_data[:splitting_length])
x_test = np.array(input_data[splitting_length:])
y_train = np.array(output_data[:splitting_length])
y_test = np.array(output_data[splitting_length:])

# Creating the model 

In [10]:
model = Sequential()
model.add(LSTM(128, input_shape = x_train[0].shape))
model.add(Dense(50))   
model.add(Activation('softmax'))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics = ['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 128)               91648     
_________________________________________________________________
dense_1 (Dense)              (None, 50)                6450      
_________________________________________________________________
activation_1 (Activation)    (None, 50)                0         
Total params: 98,098
Trainable params: 98,098
Non-trainable params: 0
_________________________________________________________________


# Fitting the model

In [11]:
model.fit(x_train, y_train, verbose = 2, batch_size = 128, epochs = 2 ,validation_data = (x_test, y_test))

Train on 28289 samples, validate on 7073 samples
Epoch 1/2
517s - loss: -1.1606e+02 - acc: 0.9112 - val_loss: -1.2063e+02 - val_acc: 0.9837
Epoch 2/2
167s - loss: -1.2834e+02 - acc: 0.9773 - val_loss: -1.2063e+02 - val_acc: 0.9837


<keras.callbacks.History at 0x7f6a90bd9b00>

# Evaluating model accuracy

In [12]:
model.evaluate(x_test,y_test)



[-120.63037976189851, 0.98374098685140676]

# Prediction based on the above model

In [13]:
seed = x_train[0]
sent = ' '.join([word_model.similar_by_vector(word)[0][0] for word in seed])
seed1 = x_train[100][0:20]
sent1 = ' '.join([word_model.similar_by_vector(word)[0][0] for word in seed])
sent2 = ' '.join([word_model.similar_by_vector(word)[0][0] for word in seed1])
original_sent = sent1 + ' ' + sent2
print('\n\n Input Sentence\n\n: ',sent1)
print('\n\n Actual Sentence\n\n: ',original_sent)
for i in range(20):

    pred_word = model.predict(seed.reshape(1,100,50))
    seed = np.roll(seed, -1)
    seed[-1] = pred_word
    new_word = word_model.similar_by_vector(pred_word[0])[0][0]
    sent += ' ' + new_word
    print(sent)
print('\n\n Predicted sentence\n\n:',sent)



 Input Sentence

:  CHAPTER I . Down the Rabbit-Hole Alice was beginning to get very tired of sitting by her sister on the bank , and of having nothing to do : once or twice she had peeped into the book her sister was reading , but it had no pictures or conversations in it , ‘and what is the use of a book , ’ thought Alice ‘without pictures or conversations ? ’ So she was considering in her own mind ( as well as she could , for the hot day made her feel very sleepy and stupid ) , whether


 Actual Sentence

:  CHAPTER I . Down the Rabbit-Hole Alice was beginning to get very tired of sitting by her sister on the bank , and of having nothing to do : once or twice she had peeped into the book her sister was reading , but it had no pictures or conversations in it , ‘and what is the use of a book , ’ thought Alice ‘without pictures or conversations ? ’ So she was considering in her own mind ( as well as she could , for the hot day made her feel very sleepy and stupid ) , whether the pleas