In [None]:
# Using pre-trained GloVe embeddings to find similar words.

In [None]:
# Necessary steps before you run the the code

- Go to the GloVe project page.
- Download the file glove.6B.zip. It contains word embeddings of multiple dimensions like 50d, 100d, etc.
- Unzip the file to see glove.6B.50d.txt.
- Place glove.6B.50d.txt in the same directory as your code file, or specify the correct path in the code if it's located elsewhere.

In [4]:
# Import necessary libraries
import numpy as np  # For numerical operations
from sklearn.metrics.pairwise import cosine_similarity  # For similarity calculation

# Load GloVe embeddings
# Path to GloVe file (ensure this file is in the directory or provide the correct path)
embeddings_index = {}  # Dictionary to store word vectors

# Open and read the GloVe file
with open('glove.6B.50d.txt', encoding='utf-8') as f:
    for line in f:
        values = line.split()  # Split each line into components
        word = values[0]  # First item is the word
        vector = np.asarray(values[1:], dtype='float32')  # Remaining items are vector values
        embeddings_index[word] = vector  # Add word and its vector to dictionary

# Check if "scholar" exists in the GloVe vocabulary
if "scholar" in embeddings_index:
    scholar_vector = embeddings_index["scholar"]  # Get the vector for "scholar"

# Calculate cosine similarity between "scholar" and all other words
similar_words = {}  # Dictionary to store words and their similarity scores
for word, vector in embeddings_index.items():
    similarity = cosine_similarity([scholar_vector], [vector])[0][0]  # Compute similarity
    similar_words[word] = similarity  # Add word and similarity score to dictionary

# Sort words by similarity score in descending order
sorted_similar_words = sorted(similar_words.items(), key=lambda x: x[1], reverse=True)

# Print top 10 words similar to "scholar"
print("Top words similar to 'scholar':")
# Skip the first word as it will be "scholar" itself
for word, similarity in sorted_similar_words[1:11]:  
    print(f"{word}: {similarity}")

Top words similar to 'scholar':
historian: 0.8646411895751953
philosopher: 0.8056567311286926
poet: 0.7976035475730896
author: 0.7962594032287598
professor: 0.7924675345420837
eminent: 0.7883749008178711
literature: 0.7770242094993591
theologian: 0.7625619173049927
sociologist: 0.7605342864990234
linguist: 0.7595699429512024
