In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Embedding, Dot, Flatten, Input
from tensorflow.keras.models import Model
import numpy as np
import pandas as pd
from google.colab import files

In [None]:
uploaded = files.upload()

Saving Dataset.xlsx to Dataset (2).xlsx


In [None]:
file_path = list(uploaded.keys())[0]
data = pd.read_excel(file_path, header=None)

In [None]:
words = data[0].tolist()

In [None]:
vocab = sorted(set(words))
vocab_size = len(vocab)

In [None]:
word_to_idx = {word: idx for idx, word in enumerate(vocab)}
idx_to_word = {idx: word for word, idx in word_to_idx.items()}

In [None]:
window_size = 3
training_data = []

In [None]:
for i in range(window_size, len(words) - window_size):
    target_word = words[i]
    context_words = [words[j] for j in range(i - window_size, i + window_size + 1) if j != i]
    for context_word in context_words:
        training_data.append((word_to_idx[target_word], word_to_idx[context_word]))


In [None]:
target_words = np.array([pair[0] for pair in training_data], dtype=np.int32)
context_words = np.array([pair[1] for pair in training_data], dtype=np.int32)

In [None]:
embedding_dim = 6

In [None]:
input_target = Input((1,))
input_context = Input((1,))

In [None]:
embedding_layer = Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=1, name='embedding')


In [None]:
target_embedding = embedding_layer(input_target)
target_embedding = Flatten()(target_embedding)
context_embedding = embedding_layer(input_context)
context_embedding = Flatten()(context_embedding)


In [None]:
dot_product = Dot(axes=1)([target_embedding, context_embedding])

In [None]:
model = Model(inputs=[input_target, input_context], outputs=dot_product)
model.compile(optimizer='adam', loss='mse')


In [None]:
model.fit([target_words, context_words], np.ones_like(target_words), epochs=100, verbose=0)

<keras.src.callbacks.History at 0x7a10148c9cc0>

In [None]:
word_vectors = model.get_layer('embedding').get_weights()[0]

In [None]:
def get_word_vector(word):
    idx = word_to_idx.get(word.lower(), None)
    if idx is None:
        return None
    else:
        return word_vectors[idx]

In [None]:
output_file_path = 'word_vectors.csv'
with open(output_file_path, 'w') as f:
    f.write('word,vector\n')
    for word in vocab:
        vector = get_word_vector(word)
        if vector is not None:
            vector_str = ','.join(map(str, vector))
            f.write(f'{word.lower()},{vector_str}\n')


In [None]:
files.download(output_file_path)
print(f"Word vectors saved to {output_file_path}")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Word vectors saved to word_vectors.csv
