In [8]:
import numpy as np
import gensim.downloader as api

In [9]:
print("Loading GloVe model (this may take a few seconds)...")
glove_model = api.load("glove-wiki-gigaword-100")
print("GloVe model loaded.")


Loading GloVe model (this may take a few seconds)...
GloVe model loaded.


In [16]:
words = [
    'apple', 'apples', 'pear', 'berry', 'peas',
    'strawberry', 'peach', 'potato', 'grape'
]


vectors = {}
missing_words = []
for word in words:
    if word in glove_model:
        vectors[word] = glove_model[word]
    else:
        missing_words.append(word)

if missing_words:
    print("Words not found in GloVe:", missing_words)


In [17]:
def cosine_similarity(v1, v2):
    if np.linalg.norm(v1) == 0 or np.linalg.norm(v2) == 0:
        return 0.0
    return np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))

In [39]:
q = "pear"
q_vec = vectors[q]

similarities = []
for word, vec in vectors.items():
    if word == q:
        continue
    score = cosine_similarity(q_vec, vec)
    similarities.append((word, score))

In [40]:
similarities.sort(key=lambda x: x[1], reverse=True)
top = 3
print(f"\nTop {top} most similar words to '{q}':")
for i, (word, score) in enumerate(similarities[:top], 1):
    print(f"{i}. {word} — Similarity: {score:.4f}")


Top 3 most similar words to 'pear':
1. peach — Similarity: 0.7705
2. apples — Similarity: 0.6245
3. apple — Similarity: 0.5890
