In [1]:
import pandas as pd
import numpy as np
from gensim.models import Word2Vec

In [2]:
sentences = [
    "I like this product",
    "This product is great",
    "I love this item",
    "This is a good product"
]

In [3]:
tokenized_sentences = [sentence.lower().split() for sentence in sentences]
tokenized_sentences

[['i', 'like', 'this', 'product'],
 ['this', 'product', 'is', 'great'],
 ['i', 'love', 'this', 'item'],
 ['this', 'is', 'a', 'good', 'product']]

In [4]:
model = Word2Vec(sentences=tokenized_sentences, vector_size=3, window=2, min_count=1, sg=0)

In [5]:
model.wv.index_to_key

['this', 'product', 'is', 'i', 'good', 'a', 'item', 'love', 'great', 'like']

In [6]:
model.wv['product']

array([ 0.3003091 , -0.31009832, -0.23722696], dtype=float32)

In [7]:
model.wv['good']

array([-0.15122044,  0.21846838, -0.16200535], dtype=float32)

In [8]:
print("Individual Word Vectors:\n")

for word in model.wv.index_to_key:
    print({word: model.wv[word] })

Individual Word Vectors:

{'this': array([-0.01787424,  0.00788105,  0.17011166], dtype=float32)}
{'product': array([ 0.3003091 , -0.31009832, -0.23722696], dtype=float32)}
{'is': array([ 0.21529575,  0.2990996 , -0.16718094], dtype=float32)}
{'i': array([-0.12544572,  0.24601682, -0.05111571], dtype=float32)}
{'good': array([-0.15122044,  0.21846838, -0.16200535], dtype=float32)}
{'a': array([-0.06053392,  0.09588599,  0.03306246], dtype=float32)}
{'item': array([-0.27617383, -0.3149606 ,  0.24372554], dtype=float32)}
{'love': array([0.16900873, 0.22525644, 0.02542885], dtype=float32)}
{'great': array([ 0.21169634, -0.1135122 , -0.03154671], dtype=float32)}
{'like': array([ 0.19228578, -0.25072125, -0.13120346], dtype=float32)}


In [9]:
def average_vector(sentence):
    
    words = sentence.lower().split()
    vectors = [model.wv[word] for word in words if word in model.wv]
    
    if not vectors: 
        return np.zeros(model.vector_size)
    
    return np.mean(vectors, axis=0)

In [10]:
data = {
    'sentence': sentences,
    'average_vector': [average_vector(sentence) for sentence in sentences],
}

In [11]:
df = pd.DataFrame(data)

In [12]:
print("Sentence Representations:\n")
print(df)

Sentence Representations:

                 sentence                            average_vector
0     I like this product  [0.087318726, -0.07673043, -0.062358618]
1   This product is great   [0.17735673, -0.029157469, -0.06646074]
2        I love this item   [-0.062621266, 0.041048422, 0.09703758]
3  This is a good product  [0.057195246, 0.062247343, -0.072647825]
