In [4]:
import gensim
from gensim.models import Word2Vec
from gensim.utils import simple_preprocess
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA

In [2]:
sentences = [
    "I love machine learning and deep learning.",
    "Natural language processing is fascinating.",
    "Gensim is a great library for word embeddings.",
    "Word2Vec creates word embeddings based on context.",
    "Training word embeddings from scratch can be fun and educational."
]

# Preprocess the sentences (tokenize and lowercase)
processed_sentences = [simple_preprocess(sentence) for sentence in sentences]

# Train Word2Vec model
model = Word2Vec(sentences=processed_sentences, vector_size=100, window=5, min_count=1, workers=4, sg=1)

In [6]:
def visualize_embeddings(words):
    # Get word vectors for the specified words
    word_vectors = [model.wv[word] for word in words]

    # Reduce dimensions to 2D using PCA
    pca = PCA(n_components=2)
    word_vectors_2d = pca.fit_transform(word_vectors)

    # Plot words in 2D space
    plt.figure(figsize=(10, 7))
    for word, (x, y) in zip(words, word_vectors_2d):
        plt.scatter(x, y)
        plt.text(x + 0.05, y + 0.05, word, fontsize=12)

    plt.show()

In [7]:
words = ["machine", "learning", "deep", "natural", "language", "processing", "word2vec", "gensim"]

visualize_embeddings(words)

TypeError: 'Word2Vec' object is not subscriptable

In [3]:
word = "learning"
print(f"Vector for '{word}':")
print(model.wv[word])

# Find similar words
print("\nWords similar to 'learning':")
similar_words = model.wv.most_similar("learning", topn=5)
for word, similarity in similar_words:
    print(f"{word}: {similarity}")

# Word analogy (learning - fun + educational = ?)
print("\nSolving analogy: 'learning - fun + educational' = ?")
result = model.wv.most_similar(positive=['learning', 'educational'], negative=['fun'], topn=1)
print(result)


Vector for 'learning':
[ 9.4563962e-05  3.0773198e-03 -6.8126451e-03 -1.3754654e-03
  7.6685809e-03  7.3464094e-03 -3.6732971e-03  2.6427018e-03
 -8.3171297e-03  6.2054861e-03 -4.6373224e-03 -3.1641065e-03
  9.3113566e-03  8.7338570e-04  7.4907029e-03 -6.0740625e-03
  5.1605068e-03  9.9228229e-03 -8.4573915e-03 -5.1356913e-03
 -7.0648370e-03 -4.8626517e-03 -3.7785638e-03 -8.5361991e-03
  7.9556061e-03 -4.8439382e-03  8.4236134e-03  5.2625705e-03
 -6.5500261e-03  3.9578713e-03  5.4701497e-03 -7.4265362e-03
 -7.4057197e-03 -2.4752307e-03 -8.6257253e-03 -1.5815723e-03
 -4.0343284e-04  3.2996845e-03  1.4418805e-03 -8.8142155e-04
 -5.5940580e-03  1.7303658e-03 -8.9737179e-04  6.7936908e-03
  3.9735902e-03  4.5294715e-03  1.4343059e-03 -2.6998555e-03
 -4.3668128e-03 -1.0320747e-03  1.4370275e-03 -2.6460087e-03
 -7.0737829e-03 -7.8053069e-03 -9.1217868e-03 -5.9351693e-03
 -1.8474245e-03 -4.3238713e-03 -6.4606704e-03 -3.7173224e-03
  4.2891586e-03 -3.7390434e-03  8.3781751e-03  1.5339935e-03
 