# 03 ADVANCED LEVEL TASK

###  2)Next Word Prediction: Using Tensorflow and Keras library train a RNN, to predict the next word

## Abhishek_Sutar


### import Libraries

In [None]:
import numpy as np
from nltk.tokenize import RegexpTokenizer
from keras.models import Sequential, load_model
from keras.layers import LSTM
from keras.layers.core import Dense, Activation
from keras.optimizers import RMSprop
import matplotlib.pyplot as plt
import pickle
import heapq

### load the data

In [None]:

text = open('dataset.txt',encoding='utf8').read().lower()
print('corpus length:', len(text))

### split the dataset into each word 

In [None]:
tknz = RegexpTokenizer(r'\w+')
words = tknz.tokenize(text)

In [None]:
words

In [None]:
unique_words = np.unique(words)
unique_word_index = dict((c, i) for i, c in enumerate(unique_words))

In [None]:
unique_word_index

In [None]:
unique_words

### Feature Engineering

In [None]:
w_len = 8
prev_w = []
next_w = []
for i in range(len(words) - w_len):
    prev_w.append(words[i:i + w_len])
    next_w.append(words[i + w_len])
print(prev_w[0])
print(next_w[0])

In [None]:
X = np.zeros((len(prev_w), w_len, len(unique_words)), dtype=bool)
Y = np.zeros((len(next_w), len(unique_words)), dtype=bool)

In [None]:
X

In [None]:
Y

In [None]:
for i, each_words in enumerate(prev_w):
    for j, each_word in enumerate(each_words):
        X[i, j, unique_word_index[each_word]] = 1
    Y[i, unique_word_index[next_w[i]]] = 1
print(X[1][0])

###  Recurrent Neural networks for next word prediction model.LSTM model, which is a very powerful RNN.

In [None]:
model = Sequential()
model.add(LSTM(128, input_shape=(w_len, len(unique_words))))
model.add(Dense(len(unique_words)))
model.add(Activation('softmax'))

### Training the Next Word Prediction Model

In [None]:
optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
history = model.fit(X, Y, validation_split=0.05, batch_size=128, epochs=7, shuffle=True).history

In [None]:
model.save('model_wp.h5')
pickle.dump(his tory, open("history.p", "wb"))
model = load_model('model_wp.h5')
history = pickle.load(open("history.p", "rb"))

In [None]:
plt.plot(history['accuracy'])
plt.plot(history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper right')



In [None]:

plt.plot(history['loss'])
plt.plot(history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper right')

In [None]:
def prepare_input(text):
    x = np.zeros((1, w_len, len(unique_words)))
    for t, word in enumerate(text.split()):
        x[0, t, unique_word_index[word]] = 1        
    return x

def sample(preds, top_n=3):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds)
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    return heapq.nlargest(top_n, range(len(preds)), preds.take)

def predict_completion(text):
    original_text = text
    generated = text
    completion = ''
    while True:
        x = prepare_input(text)
        preds = model.predict(x, verbose=0)[0]
        next_index = sample(preds, top_n=1)[0]
        next_char = indices_char[next_index]
        text = text[1:] + next_char
        completion += next_char      
        if len(original_text + completion) + 2 > len(original_text) and next_char == ' ':
            return completion

def pred_w(text, n=3):
    x = prepare_input(text)
    preds = model.predict(x, verbose=0)[0]
    next_indices = sample(preds, n)
    return [unique_words[idx] for idx in next_indices]

In [None]:

lines=["It is the quality of one’s convictions that determines success, not the number of followers —Remus Lupin",
"I’m going to keep going until I succeed—or I die. Don’t think I don’t know how this might end. I’ve known it for years. — Harry Potter"
,"That  more trouble than it’s worth. And quite honestly, I’ve had enough trouble for a lifetime.— Harry Potter"
,"We’re all human, are not we? Every human life is worth the same, and worth saving. Kingsley Shacklebolt"
,"‘Does it hurt?’ The childish question had escaped Harry's lips before he could stop it. ‘Dying? Not at all,’ said Sirius. ‘Quicker and easier than falling asleep.’"
,"He can run faster than Severus Snape confronted with shampoo.— Fred Weasley"

,"Words are, in my not-so-humble opinion, our most inexhaustible source of magic. Capable of both inflicting injury, and remedying it. ― Albus Dumbledore"
,"Not my daughter, you b*tch! ― Molly Weasley"
,"And Percy was shaking his brother, and Ron was kneeling beside them, and Fred's eyes stared without seeing, the ghost of his last laugh still etched upon his face."
,"It is a curious thing, Harry, but perhaps those who are best suited to power are those who have never sought it. Those who, like you, have leadership thrust upon them, and take up the mantle because they must, and find to their own surprise that they wear it well. — Albus Dumbledore"
,"I've always wanted to use that spell. ― Minerva McGonagall"
,"Of course it is happening inside your head, Harry, but why on earth should that mean that it is not real? ― Albus Dumbledore"
,"Do not pity the dead, Harry. Pity the living, and, above all, those who live without love. ― Albus Dumbledore"
,"‘Why are they all staring?’ demanded Albus as he and Rose craned around to look at the other students. ‘Don’t let it worry you,’ said Ron. ‘It’s me. I’m extremely famous.’"
]

### Testing Next Word Prediction Model

In [None]:
for l in lines:
    print("original Sentence:" ,l,end='')
    seq = " ".join(tknz.tokenize(l.lower())[0:5])
    print("\nSequence:",seq)
    print("Next possible words:",pred_w(seq, 5))