In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Dense, Dropout, GlobalAveragePooling1D
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

In [None]:
text_data = """I love to eat pizza. Cat and dogs are palying.while the dog is barking on strangers ,this is the CBOW model implementation example """


In [None]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts([text_data])
total_words = len(tokenizer.word_index) + 1

In [None]:
window_size = 2
input_sequences = []
labels = []

In [None]:
token_list = tokenizer.texts_to_sequences([text_data])[0]

In [None]:
for i in range(window_size, len(token_list) - window_size):
    context = token_list[i - window_size:i] + token_list[i + 1:i + window_size + 1]
    target = token_list[i]
    input_sequences.append(context)
    labels.append(target)

In [None]:
X = np.array(input_sequences)
y = tf.keras.utils.to_categorical(labels, num_classes=total_words)

In [None]:
model = Sequential()
model.add(Embedding(total_words, 10, input_length=2 * window_size))
model.add(GlobalAveragePooling1D())
model.add(Dropout(0.2))  # Adding dropout for regularization
model.add(Dense(total_words, activation='softmax'))



In [None]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
model.fit(X, y, epochs=100, verbose=1)

Epoch 1/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - accuracy: 0.0500 - loss: 3.1354
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step - accuracy: 0.1000 - loss: 3.1323
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step - accuracy: 0.0500 - loss: 3.1332
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step - accuracy: 0.1000 - loss: 3.1315
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step - accuracy: 0.1000 - loss: 3.1271
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 131ms/step - accuracy: 0.0500 - loss: 3.1285
Epoch 7/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 135ms/step - accuracy: 0.1500 - loss: 3.1258
Epoch 8/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step - accuracy: 0.2000 - loss: 3.1258
Epoch 9/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[3

<keras.src.callbacks.history.History at 0x792fd824da80>

In [None]:
def predict_word(context_words):
    context_seq = tokenizer.texts_to_sequences([context_words])[0]
    context_seq = pad_sequences([context_seq], maxlen=2 * window_size, padding='pre')
    predicted_probs = model.predict(context_seq)
    predicted_word = tokenizer.index_word[np.argmax(predicted_probs)]
    return predicted_word

In [None]:
context_words = "is the model implementation"  # Provide two words around the target word
predicted_word = predict_word(context_words)
print("Predicted target word:",predicted_word)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 89ms/step
Predicted target word: cbow
