# Skip-Gram Model Demonstration

In [1]:
# Install necessary libraries
# Uncomment the line below to install required packages
# !pip install numpy gensim matplotlib

In [2]:
## Importing necessary libraries
import numpy as np
from gensim.models import Word2Vec
import matplotlib.pyplot as plt

In [3]:
## Simple example dataset
documents = [
    "The dog barks",
    "A dog is a pet",
    "Cats and dogs are popular pets",
    "The dog plays in the park",
    "The cat sleeps on the couch"
]

In [4]:
## Tokenizing the sentences
# Convert each sentence to lowercase and split into words
sentences = [doc.lower().split() for doc in documents]

In [5]:
## Training a Skip-Gram model
# Initialize and train the Word2Vec model with Skip-Gram
model = Word2Vec(sentences, vector_size=10, window=2, min_count=1, sg=1)

In [6]:
## Displaying the vectors for 'dog' and 'cat'
# Retrieve and print word vectors for 'dog' and 'cat'
dog_vector = model.wv['dog']
cat_vector = model.wv['cat']

print("Vector for 'dog':", dog_vector)
print("Vector for 'cat':", cat_vector)

Vector for 'dog': [ 0.01521778 -0.02240374  0.04538573 -0.04843775  0.03956548 -0.04697673
  0.04319709 -0.01147669 -0.02494546 -0.01619863]
Vector for 'cat': [ 0.01664109 -0.01494748 -0.02252962 -0.0349847  -0.03235445  0.02764889
  0.04668408 -0.02254803 -0.01676486  0.04890791]


In [7]:
## Function to visualize word embeddings
def plot_word_vectors(model):
    # Extract words and their vectors
    words = list(model.wv.key_to_index)
    vectors = [model.wv[word] for word in words]
    
    # Create a scatter plot
    plt.figure(figsize=(8, 8))
    plt.scatter(
        np.array(vectors)[:, 0], np.array(vectors)[:, 1],
    )
    
    # Annotate each point with the word
    for i, word in enumerate(words):
        plt.annotate(word, xy=(vectors[i][0], vectors[i][1]))
    
    # Add title and labels
    plt.title('Word Vectors Visualization')
    plt.xlabel('X-axis')
    plt.ylabel('Y-axis')
    plt.grid(True)
    plt.show()

In [8]:
## Visualizing the word vectors
# Call the function to visualize the trained word vectors
plot_word_vectors(model)