In [1]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences


In [2]:
paragraph = """
Machine learning is a method of data analysis that automates analytical model building. 
It is a branch of artificial intelligence based on the idea that systems can learn from data, 
identify patterns and make decisions with minimal human intervention.
"""


In [3]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts([paragraph])
word_index = tokenizer.word_index
total_words = len(word_index) + 1

# Convert text to sequences of words
input_sequences = []
for line in paragraph.split('\n'):
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram = token_list[:i+1]
        input_sequences.append(n_gram)

# Pad sequences to equal length
max_seq_len = max(len(x) for x in input_sequences)
input_sequences = pad_sequences(input_sequences, maxlen=max_seq_len, padding='pre')

# Split into X and y
X = input_sequences[:, :-1]
y = input_sequences[:, -1]
y = np.array(y)


In [4]:
model = Sequential()
model.add(Embedding(total_words, 10, input_length=max_seq_len - 1))
model.add(LSTM(150))
model.add(Dense(total_words, activation='softmax'))

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])




In [5]:
model.summary()

In [6]:
model.fit(X, y, epochs=300, verbose=0)


<keras.src.callbacks.history.History at 0x15ad3cea550>

In [7]:
def predict_next_word(seed_text, max_seq_len):
    token_list = tokenizer.texts_to_sequences([seed_text])[0]
    token_list = pad_sequences([token_list], maxlen=max_seq_len - 1, padding='pre')
    predicted_probs = model.predict(token_list, verbose=0)
    predicted_index = np.argmax(predicted_probs, axis=-1)[0]

    for word, index in tokenizer.word_index.items():
        if index == predicted_index:
            return word
    return ""

# Example usage:
seed = "Machine learning is a method of data analysis that automates analytical model"
next_word = predict_next_word(seed, max_seq_len)
print(f"Input: '{seed}' -> Predicted Next Word: '{next_word}'")


Input: 'Machine learning is a method of data analysis that automates analytical model' -> Predicted Next Word: 'building'
