<a href="https://colab.research.google.com/github/Domaakshithareddy/next-word-prediction/blob/main/Next_Word_Prediction_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Next Word Prediction:

### Importing The Required Libraries:

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
import pickle
import numpy as np
import os

In [None]:
file = open("metamorphosis_clean.txt", "r", encoding = "utf8")
lines = []

for i in file:
    lines.append(i)

print("The First Line: ", lines[0])
print("The Last Line: ", lines[-1])

The First Line:  ﻿One morning, when Gregor Samsa woke from troubled dreams, he found

The Last Line:  first to get up and stretch out her young body.


### Cleaning the data:

In [None]:
data = ""
for i in lines:
    data = ' '. join(lines)

data = data.replace('\n', '').replace('\r', '').replace('\ufeff', '')
data[:360]

'One morning, when Gregor Samsa woke from troubled dreams, he found himself transformed in his bed into a horrible vermin.  He lay on his armour-like back, and if he lifted his head a little he could see his brown belly, slightly domed and divided by arches into stiff sections.  The bedding was hardly able to cover it and seemed ready to slide off any moment.'

In [None]:
import string

translator = str.maketrans(string.punctuation, ' '*len(string.punctuation)) \
new_data = data.translate(translator)

new_data[:500]

'One morning  when Gregor Samsa woke from troubled dreams  he found himself transformed in his bed into a horrible vermin   He lay on his armour like back  and if he lifted his head a little he could see his brown belly  slightly domed and divided by arches into stiff sections   The bedding was hardly able to cover it and seemed ready to slide off any moment   His many legs  pitifully thin compared with the size of the rest of him  waved about helplessly as he looked    What s happened to me   he'

In [None]:
z = []

for i in data.split():
    if i not in z:
        z.append(i)

data = ' '.join(z)
data[:500]

'One morning, when Gregor Samsa woke from troubled dreams, he found himself transformed in his bed into a horrible vermin. He lay on armour-like back, and if lifted head little could see brown belly, slightly domed divided by arches stiff sections. The bedding was hardly able to cover it seemed ready slide off any moment. His many legs, pitifully thin compared with the size of rest him, waved about helplessly as looked. "What\'s happened me?" thought. It wasn\'t dream. room, proper human room altho'

### Tokenization:

In [None]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts([data])

# saving the tokenizer for predict function
pickle.dump(tokenizer, open('tokenizer1.pkl', 'wb'))

sequence_data = tokenizer.texts_to_sequences([data])[0]
sequence_data[:10]

[17, 53, 293, 2, 18, 729, 135, 730, 294, 8]

In [None]:
vocab_size = len(tokenizer.word_index) + 1
print(vocab_size)

2617


In [None]:
sequences = []

for i in range(1, len(sequence_data)):
    words = sequence_data[i-1:i+1]
    sequences.append(words)

print("The Length of sequences are: ", len(sequences))
sequences = np.array(sequences)
sequences[:10]

The Length of sequences are:  3889


array([[ 17,  53],
       [ 53, 293],
       [293,   2],
       [  2,  18],
       [ 18, 729],
       [729, 135],
       [135, 730],
       [730, 294],
       [294,   8],
       [  8, 731]])

In [None]:
X = []
y = []

for i in sequences:
    X.append(i[0])
    y.append(i[1])

X = np.array(X)
y = np.array(y)

In [None]:
print("The Data is: ", X[:5])
print("The responses are: ", y[:5])

The Data is:  [ 17  53 293   2  18]
The responses are:  [ 53 293   2  18 729]


In [None]:
y = to_categorical(y, num_classes=vocab_size)
y[:5]

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

### Creating the Model:

In [None]:
model = Sequential()
model.add(Embedding(vocab_size, 10, input_length=1))
model.add(LSTM(1000, return_sequences=True))
model.add(LSTM(1000))
model.add(Dense(1000, activation="relu"))
model.add(Dense(vocab_size, activation="softmax"))



In [None]:
model.build(input_shape=(1,1))
model.summary()

### Callbacks:

In [None]:
from tensorflow.keras.callbacks import ReduceLROnPlateau

reduce = ReduceLROnPlateau(monitor='loss', factor=0.2, patience=3, min_lr=0.0001, verbose = 1)

### Compile The Model:

In [None]:
model.compile(loss="categorical_crossentropy", optimizer=Adam(learning_rate=0.001))

### Fit The Model:

In [None]:
model.fit(X, y, epochs=150, batch_size=64, callbacks=[reduce])
model.save("nextword_final.h5")
print("Model saved after all epochs!")

Epoch 1/150
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 297ms/step - loss: 7.8720 - learning_rate: 0.0010
Epoch 2/150
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 296ms/step - loss: 7.8622 - learning_rate: 0.0010
Epoch 3/150
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 314ms/step - loss: 7.8093 - learning_rate: 0.0010
Epoch 4/150
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 307ms/step - loss: 7.6248 - learning_rate: 0.0010
Epoch 5/150
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 338ms/step - loss: 7.4221 - learning_rate: 0.0010
Epoch 6/150
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 302ms/step - loss: 7.2399 - learning_rate: 0.0010
Epoch 7/150
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 310ms/step - loss: 7.1058 - learning_rate: 0.0010
Epoch 8/150
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 300ms/step - loss: 7.0129 - learni



Model saved after all epochs!


In [None]:
# Importing the Libraries
from tensorflow.keras.models import load_model
import numpy as np
import pickle

# Load the model and tokenizer
model = load_model('nextword_final.h5')
tokenizer = pickle.load(open('tokenizer1.pkl', 'rb'))

def Predict_Next_Words(model, tokenizer, text):
    """
    Predict the next word using the trained model and tokenizer.
    """
    try:
        sequence = tokenizer.texts_to_sequences([text])

        # Handle the case where the word is not in the tokenizer
        if not sequence[0]:
            print("Word not in vocabulary!")
            return

        sequence = np.array(sequence[0]).reshape(1, -1)  # Reshape for the model

        preds = np.argmax(model.predict(sequence), axis=-1)[0]  # Get the predicted index

        # Find the corresponding word
        predicted_word = None
        for key, value in tokenizer.word_index.items():
            if value == preds:
                predicted_word = key
                break

        if predicted_word:
            print(f"Predicted word: {predicted_word}")
        else:
            print("Prediction failed: No matching word found!")

    except Exception as e:
        print(f"Error in prediction: {e}")

# Interactive loop for user input
while True:
    text = input("Enter your line: ")

    if text.lower() == "stop the script":
        print("Ending The Program.....")
        break

    else:
        try:
            words = text.split(" ")
            last_word = words[-1]  # Extract the last word
            Predict_Next_Words(model, tokenizer, last_word)  # Predict the next word

        except Exception as e:
            print(f"Error: {e}")
            continue




Enter your line: at the dull
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 419ms/step
Predicted word: weather
Enter your line: collection of textile
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
Predicted word: samples
Enter your line: what a strenuous
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 88ms/step
Predicted word: career
Enter your line: i am
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
Predicted word: less
Enter your line: i am eating
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
Predicted word: prepared
Enter your line: i am doing
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
Predicted word: business
Enter your line: i love
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
Predicted word: strongly
Enter your line: Artificial Inteligence is the 
Word not in vocabulary!
Enter your line: i am cooking
[1m1/1[0m [32m━━━━

KeyboardInterrupt: Interrupted by user