<a href="https://colab.research.google.com/github/A190nux/Word2Vec/blob/main/word2vec.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install gensim



In [2]:
import numpy as np
import gensim.downloader as api

print("Available Models:")
print(api.info()['models'].keys())

Available Models:
dict_keys(['fasttext-wiki-news-subwords-300', 'conceptnet-numberbatch-17-06-300', 'word2vec-ruscorpora-300', 'word2vec-google-news-300', 'glove-wiki-gigaword-50', 'glove-wiki-gigaword-100', 'glove-wiki-gigaword-200', 'glove-wiki-gigaword-300', 'glove-twitter-25', 'glove-twitter-50', 'glove-twitter-100', 'glove-twitter-200', '__testing_word2vec-matrix-synopsis'])


In [3]:
model = api.load("glove-wiki-gigaword-100")

In [24]:
def cbow_predict(context_words, model, topn=5):
    """
    Predict target word given context words using CBOW approach with model.most_similar
    """
    valid_words = [word for word in context_words if word in model]
    if not valid_words:
        return []

    context_vectors = []
    valid_words = []
    for word in context_words:
        if word in model:
            context_vectors.append(model[word])
            valid_words.append(word)

    if not context_vectors:
        return []

    vector = np.mean(context_vectors, axis=0)

    return model.most_similar(positive=vector, topn=topn)



def skipgram_predict(target_word, model, topn=5):
    """
    Predict context words given a target word using Skip-gram approach with model.most_similar
    """
    if target_word not in model:
        return []

    vector = []
    if target_word in model:
        vector = model[target_word]


    return model.most_similar(vector, topn=topn)

In [25]:
target_word = "cat"
predictions = skipgram_predict(target_word, model)
print(f"Skip-gram predictions for target word '{target_word}': {predictions}")

Skip-gram predictions for target word 'cat': [('cat', 0.9999998807907104), ('dog', 0.8798074722290039), ('rabbit', 0.7424427270889282), ('cats', 0.732300341129303), ('monkey', 0.7288710474967957)]


In [26]:
target_word = "king"
predictions = skipgram_predict(target_word, model)
print(f"Skip-gram predictions for target word '{target_word}': {predictions}")

Skip-gram predictions for target word 'king': [('king', 1.0), ('prince', 0.7682328820228577), ('queen', 0.7507690787315369), ('son', 0.7020888328552246), ('brother', 0.6985775232315063)]


In [27]:
context_words = ["the", "cat", "on", "the", "mat"]
predictions = cbow_predict(context_words, model, topn = 1)
print(f"CBOW predictions for context words '{context_words}': {predictions}")

CBOW predictions for context words '['the', 'cat', 'on', 'the', 'mat']': [('the', 0.9126800894737244)]
