<a href="https://colab.research.google.com/github/yuvrajdevrukhkar/DLLab/blob/main/prac5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import necessary packages
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

# Example text data
text = "continuous bag of words model is a simple and effective method for word embeddings"

# Tokenize the text
tokenizer = Tokenizer()
tokenizer.fit_on_texts([text])

# Create word-to-index and index-to-word mapping
word2idx = tokenizer.word_index
idx2word = {v: k for k, v in word2idx.items()}

# Convert text to sequences
sequences = tokenizer.texts_to_sequences([text])[0]

# Generate training data
vocab_size = len(word2idx) + 1  # Add 1 for padding token
context_window = 2  # Adjust context window size
X, y = [], []

for i in range(context_window, len(sequences) - context_window):
    context = sequences[i - context_window:i] + sequences[i + 1:i + context_window + 1]
    target = sequences[i]
    X.append(context)
    y.append(target)

X = np.array(X)
y = np.array(y)

# Pad the sequences to the same length
max_sequence_length = context_window * 2
X = pad_sequences(X, maxlen=max_sequence_length)

# Define the CBOW model
model = keras.Sequential([
    keras.layers.Embedding(input_dim=vocab_size, output_dim=100, input_length=max_sequence_length),
    keras.layers.GlobalAveragePooling1D(),
    keras.layers.Dense(vocab_size, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')

# Train the model
model.fit(X, y, epochs=100, verbose=0)

# Extract the word embeddings
word_embeddings = model.layers[0].get_weights()[0]

# Get the word embedding for a specific word
word_to_lookup = 'continuous'
word_idx = word2idx[word_to_lookup]
embedding = word_embeddings[word_idx]

print(f'Embedding for "{word_to_lookup}": {embedding}')


Embedding for "continuous": [ 0.02310492  0.06932787 -0.12654205  0.13365555  0.12372326  0.08998462
  0.07651193  0.12454349  0.10868638 -0.13465185  0.10724106 -0.05694027
 -0.0464944   0.04929291  0.12423536 -0.10129827 -0.13742745  0.089001
 -0.14689118  0.09035106  0.11407502 -0.07948466  0.11442833 -0.16235426
 -0.00892004 -0.07672003 -0.08255977 -0.09554536  0.10237414  0.15787858
  0.12563555 -0.09183277 -0.04440211 -0.08422723  0.0920196   0.05433542
  0.07938681  0.06140272  0.10778797 -0.15336682 -0.10075109  0.03808346
 -0.06810281  0.12006819 -0.08440902 -0.12546875  0.07539089  0.1053844
  0.11789857 -0.13430384  0.11090727  0.14952749 -0.06024612 -0.05517796
  0.1570728   0.09841972 -0.12943293 -0.10137805 -0.13179407 -0.08113645
 -0.16438931  0.08654918 -0.10237004  0.02611706  0.06202988 -0.07410015
  0.09887502 -0.13151161 -0.12488033 -0.16481675 -0.0675611   0.06416555
 -0.03203936  0.09313873 -0.08214913  0.14798562  0.08282845 -0.06860059
 -0.12804443  0.1471337  -