In [None]:
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.utils import to_categorical

corpus = """
Deep learning allows machines to learn from data.
It helps in predicting future outcomes.
Deep learning models are powerful and flexible.
Learning from examples is the key to generalization.
Models trained well can predict future words.
The model learns sequence patterns and predicts the next token.
"""

corpus = corpus.lower().replace('\n', ' ').strip()

tokenizer = Tokenizer()
tokenizer.fit_on_texts([corpus])
word_index = tokenizer.word_index
index_word = {v: k for k, v in word_index.items()}
vocab_size = len(word_index) + 1

encoded = tokenizer.texts_to_sequences([corpus])[0]
sequences = []
for i in range(1, len(encoded)):
    seq = encoded[: i+1]
    sequences.append(seq)

max_len = max(len(s) for s in sequences)
sequences_padded = pad_sequences(sequences, maxlen=max_len, padding='pre')

sequences_padded = np.array(sequences_padded)
X = sequences_padded[:, :-1]
y = sequences_padded[:, -1]
y_cat = to_categorical(y, num_classes=vocab_size)

embed_dim = 50
lstm_units = 100
input_length = X.shape[1]

model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=embed_dim, input_length=input_length),
    LSTM(lstm_units),
    Dense(vocab_size, activation='softmax')
])

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X, y_cat, epochs=120, batch_size=16, verbose=0)

def predict_next_word(model, tokenizer, text, max_input_len):
    text_proc = text.lower().strip()
    encoded = tokenizer.texts_to_sequences([text_proc])[0]
    if len(encoded) == 0:
        return "<unknown>"
    encoded_padded = pad_sequences([encoded], maxlen=max_input_len, padding='pre')
    preds = model.predict(encoded_padded, verbose=0)
    pred_index = int(np.argmax(preds, axis=-1)[0])
    return index_word.get(pred_index, "<unknown>")

examples = ["deep learning", "learning models", "predict future"]

print("\nPredictions:")
for ex in examples:
    next_w = predict_next_word(model, tokenizer, ex, input_length)
    print(f"Input: '{ex}' -> Next word: '{next_w}'")