In [1]:
import gensim.downloader as api
import numpy as np
import numpy.linalg as norm
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
model = api.load("glove-wiki-gigaword-50")

In [3]:
documents = [
    "The king ruled the kingdom with justice and wisdom.",
    "A queen leads her people with grace and intelligence.",
    "I love eating pizza and pasta on weekends.",
    "The monarch made a new law to help farmers.",
    "My dog loves playing fetch in the park."
]

In [23]:
model['king']

array([ 0.50451 ,  0.68607 , -0.59517 , -0.022801,  0.60046 , -0.13498 ,
       -0.08813 ,  0.47377 , -0.61798 , -0.31012 , -0.076666,  1.493   ,
       -0.034189, -0.98173 ,  0.68229 ,  0.81722 , -0.51874 , -0.31503 ,
       -0.55809 ,  0.66421 ,  0.1961  , -0.13495 , -0.11476 , -0.30344 ,
        0.41177 , -2.223   , -1.0756  , -1.0783  , -0.34354 ,  0.33505 ,
        1.9927  , -0.04234 , -0.64319 ,  0.71125 ,  0.49159 ,  0.16754 ,
        0.34344 , -0.25663 , -0.8523  ,  0.1661  ,  0.40102 ,  1.1685  ,
       -1.0137  , -0.21585 , -0.15155 ,  0.78321 , -0.91241 , -1.6106  ,
       -0.64426 , -0.51042 ], dtype=float32)

In [24]:
doc_vectors = []

for doc in documents:
    words = doc.lower().split()
    valid_words = [word for word in words if word in model]
    
    if len(valid_words) == 0:
        vec = np.zeros(model.vector_size)
    else:
        word_vectors = [model[word] for word in valid_words]
        vec = np.mean(word_vectors, axis=0)
        
    doc_vectors.append(vec)

In [28]:
print("\nDocument Similarities:\n")

for i in range(len(documents)):
    for j in range(i+1, len(documents)):
        vec1 = doc_vectors[i].reshape(1, -1)
        vec2 = doc_vectors[j].reshape(1, -1)
        sim = cosine_similarity(vec1, vec2)[0][0]
        print(f"Similarity between \n Doc1 : {documents[i]} \n Doc2 : {documents[j]} \n -> {sim:.4f}")


Document Similarities:

Similarity between 
 Doc1 : The king ruled the kingdom with justice and wisdom. 
 Doc2 : A queen leads her people with grace and intelligence. 
 -> 0.8732
Similarity between 
 Doc1 : The king ruled the kingdom with justice and wisdom. 
 Doc2 : I love eating pizza and pasta on weekends. 
 -> 0.6785
Similarity between 
 Doc1 : The king ruled the kingdom with justice and wisdom. 
 Doc2 : The monarch made a new law to help farmers. 
 -> 0.9298
Similarity between 
 Doc1 : The king ruled the kingdom with justice and wisdom. 
 Doc2 : My dog loves playing fetch in the park. 
 -> 0.7681
Similarity between 
 Doc1 : A queen leads her people with grace and intelligence. 
 Doc2 : I love eating pizza and pasta on weekends. 
 -> 0.8130
Similarity between 
 Doc1 : A queen leads her people with grace and intelligence. 
 Doc2 : The monarch made a new law to help farmers. 
 -> 0.9042
Similarity between 
 Doc1 : A queen leads her people with grace and intelligence. 
 Doc2 : My dog