In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM,Dense

In [3]:
corpus = [
    "I love machine learning",
    "I love deep learning",
    "I love artificial Intelligence",
    "Deep learning is powerful",
    "Artificial intelligence is the future"    
]
tokenizer = Tokenizer()
tokenizer.fit_on_texts(corpus)

total_words = len(tokenizer.word_index)+1
print(f"Vocabulary Size:{total_words}")

input_sequences =[]
for sentence in corpus:
    token_list = tokenizer.texts_to_sequences([sentence])[0]
    for i in range(1,len(token_list)):
        input_sequences.append(token_list[:i+1])

max_length = max(len(seq) for seq in input_sequences)
input_sequences = pad_sequences(input_sequences,maxlen=max_length,padding = 'pre')

X,y = input_sequences[:,:-1],input_sequences[:,-1]
y = tf.keras.utils.to_categorical(y, num_classes= total_words)

print(f"Input shape:{X.shape},Output shape:{y.shape}")

Vocabulary Size:12
Input shape:(16, 4),Output shape:(16, 12)


In [9]:
model = Sequential([
    Embedding(input_dim = total_words, output_dim=50,input_length=max_length-1),
    LSTM(100,return_sequences=True),
    LSTM(100),
    Dense(100,activation='relu'),
    Dense(total_words,activation='softmax')
])

model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])

model.summary()

In [11]:
model.fit(X, y, epochs=2,verbose=1)

Epoch 1/2
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 15s/step - accuracy: 0.1875 - loss: 2.4847
Epoch 2/2
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 531ms/step - accuracy: 0.2500 - loss: 2.4812


<keras.src.callbacks.history.History at 0x1a433cda360>

In [13]:
def predict_next_word(model, tokenizer, seed_text, max_length):
    token_list= tokenizer.texts_to_sequences([seed_text])[0]
    token_list= pad_sequences([token_list],maxlen=max_length-1,padding='pre')
    predicted = np.argmax(model.predict(token_list),axis=-1)
    for word,index in tokenizer.word_index.items():
        if index == predicted:
            return word
    return ""

seed_text = "I love"
next_word = predict_next_word(model, tokenizer, seed_text,max_length)
print(f"Next word prediction for '{seed_text}':{next_word}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step   
Next word prediction for 'I love':learning
