<a href="https://colab.research.google.com/github/Disciplined-22/LSTM_MODEL_PREDICT_WORD_1/blob/main/predict_tensor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf

from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
import numpy as np

In [2]:
# The Tokenizer class in Keras is used for preparing text so it can be used in neural network models.
tokenizer = Tokenizer()

# Here, you're reading a text file located in your Google Drive.
data = open('/content/drive/MyDrive/NLP_models/tensorflow_lstm/jim_rohn.txt').read()

# The text data is converted to lowercase and split into lines.
corpus = data.lower().split("\n")

# The tokenizer is fit on the corpus. This updates the internal vocabulary based on the list of texts.
tokenizer.fit_on_texts(corpus)

# The total number of words is the length of the word index plus one.
total_words = len(tokenizer.word_index) + 1

# Printing the word index gives you a dictionary of words and their assigned indices.
print(tokenizer.word_index)

# Printing total_words gives you the total number of unique words in the corpus.
print(total_words)


{'the': 1, 'you': 2, 'to': 3, 'is': 4, 'your': 5, 'a': 6, 'make': 7, "don't": 8, 'of': 9, 'not': 10, 'be': 11, 'will': 12, 'success': 13, 'it': 14, 'what': 15, 'in': 16, 'for': 17, 'more': 18, 'life': 19, 'better': 20, 'only': 21, 'are': 22, 'do': 23, 'today': 24, 'and': 25, 'that': 26, 'get': 27, "it's": 28, 'by': 29, 'become': 30, 'future': 31, 'can': 32, 'yourself': 33, 'wish': 34, 'education': 35, 'count': 36, 'create': 37, 'about': 38, 'self': 39, 'goals': 40, 'things': 41, 'on': 42, 'just': 43, 'work': 44, 'person': 45, 'were': 46, 'between': 47, 'our': 48, 'living': 49, 'days': 50, 'best': 51, 'way': 52, 'getting': 53, 'have': 54, 'destination': 55, 'easier': 56, 'key': 57, 'discipline': 58, 'bridge': 59, 'major': 60, 'difference': 61, 'set': 62, 'goal': 63, 'tomorrow': 64, 'formal': 65, 'fortune': 66, 'less': 67, 'failure': 68, 'predict': 69, 'determines': 70, 'learn': 71, 'reflection': 72, 'them': 73, 'wisely': 74, 'wait': 75, 'investment': 76, 'income': 77, 'place': 78, 'give

In [3]:
# Creating an empty list to store input sequences
input_sequences = []

# Iterating through each line in the corpus
for line in corpus:
    # Converting the current line to a sequence of tokens using the tokenizer
    token_list = tokenizer.texts_to_sequences([line])[0]

    # Generating n-gram sequences from the token list
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i+1]

        # Appending the n-gram sequence to the list of input sequences
        input_sequences.append(n_gram_sequence)


# pad sequences
max_sequence_len = max([len(x) for x in input_sequences])
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))

# create predictors and label
xs, labels = input_sequences[:,:-1],input_sequences[:,-1]

#used to covert labels to one hot encode in order to get the ys
ys = tf.keras.utils.to_categorical(labels, num_classes=total_words)

In [9]:
# Importing necessary libraries
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

# Defining the Model
model = Sequential()

# Adding an Embedding layer with input dimension total_words, embedding dimension 100,
# and input length max_sequence_len-1
model.add(Embedding(total_words, 100, input_length=max_sequence_len-1))

# Uncomment the following lines and use Bidirectional if necessary
# model.add(Bidirectional(LSTM(150)))

# Adding a unidirectional LSTM layer with 150 units
model.add(LSTM(200))

# Adding a Dense output layer with total_words units and softmax activation
model.add(Dense(total_words, activation='softmax'))




adam = Adam(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy'])
#earlystop = EarlyStopping(monitor='val_loss', min_delta=0, patience=5, verbose=0, mode='auto')
history = model.fit(xs, ys, epochs=100, verbose=1)
#print model.summary()
print(model)




Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [10]:
# Setting the initial seed text and specifying the number of words to predict
seed_text = "History is the proof the more you give"
next_words = 10

# Generating the next words in the sequence
for _ in range(next_words):
    # Converting the seed text to a sequence of tokens using the tokenizer
    token_list_1 = tokenizer.texts_to_sequences([seed_text])[0]

    # Padding the token sequence to match the model's input length
    token_list_2 = pad_sequences([token_list_1], maxlen=max_sequence_len-1, padding='pre')

    # Predicting the next word using the trained model
    predicted = np.argmax(model.predict(token_list_2), axis=-1)

    # Converting the predicted index to the corresponding word
    output_word = ""
    for word, index in tokenizer.word_index.items():
        if index == predicted:
            output_word = word
            break

    # Appending the predicted word to the seed text for the next iteration
    seed_text += " " + output_word

# Displaying the generated sequence
print(seed_text)


History is the proof the more you give the more you receive into your life to be grateful
