<a href="https://colab.research.google.com/github/Kritika0027/Data-Analytics/blob/main/Long_short_term_memory_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
import numpy as np

In [2]:
texts = [
    "I love to [MASK] programming.",
    "This is a [MASK] example.",
    "Keras makes building models [MASK]."
]

In [3]:
texts = [text.replace("[MASK]", "maskword") for text in texts]

In [4]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)

In [5]:
sequences = tokenizer.texts_to_sequences(texts)
word_index = tokenizer.word_index
vocab_size = len(word_index) + 1

In [6]:
input_sequences = []
target_words = []

maskword_index = word_index['maskword']

for sequence in sequences:
    mask_idx = sequence.index(maskword_index)
    input_sequences.append(sequence[:mask_idx] + sequence[mask_idx+1:])
    target_words.append(maskword_index)

In [7]:
max_len = max(len(seq) for seq in input_sequences)
input_sequences = pad_sequences(input_sequences, maxlen=max_len)

In [8]:
target_words = np.array(target_words)


In [9]:
embedding_dim = 10
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_len),
    LSTM(64),
    Dense(vocab_size, activation='softmax')
])




In [10]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [11]:
model.fit(input_sequences, target_words, epochs=100, verbose=1)


Epoch 1/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - accuracy: 0.0000e+00 - loss: 2.6410
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 1.0000 - loss: 2.6324
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step - accuracy: 1.0000 - loss: 2.6237
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step - accuracy: 1.0000 - loss: 2.6148
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step - accuracy: 1.0000 - loss: 2.6057
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step - accuracy: 1.0000 - loss: 2.5961
Epoch 7/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - accuracy: 1.0000 - loss: 2.5860
Epoch 8/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - accuracy: 1.0000 - loss: 2.5752
Epoch 9/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

<keras.src.callbacks.history.History at 0x7cf77e185ae0>

In [12]:
masked_sentence = "I love to [MASK] programming."
masked_sentence = masked_sentence.replace("[MASK]", "maskword")
masked_sequence = tokenizer.texts_to_sequences([masked_sentence])[0]

In [13]:
masked_idx = masked_sequence.index(word_index['maskword'])
input_sequence = masked_sequence[:masked_idx] + masked_sequence[masked_idx+1:]
input_sequence = pad_sequences([input_sequence], maxlen=max_len)

In [14]:
predicted_index = np.argmax(model.predict(input_sequence))
predicted_word = tokenizer.index_word[predicted_index]

print("Predicted word:", predicted_word)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 168ms/step
Predicted word: maskword
