In [1]:
# numpy imports
import numpy as np
from numpy import dot
from numpy.linalg import norm

from gensim.models import Word2Vec
from gensim.test.utils import common_texts, get_tmpfile
import gensim.downloader as api

In [2]:
model = api.load("glove-twitter-50")

In [3]:
def get_vectors(word1, word2):
    try:
        vec1 = model[word1]/norm(model[word1])
    except:
        print('Word1 is not present in the model')
        
    try:
        vec2 = model[word2]/norm(model[word2])
    except:
        print('Word2 is not present in the model')
        
    return vec1, vec2

In [4]:
def cosine_similarity(word1, word2):
    try:
        v1, v2 = get_vectors(word1, word2)
    except:
        return
    cos_sim = dot(v1, v2)/(norm(v1)*norm(v2))
    return cos_sim

In [9]:
def euclidean_distance(word1, word2):
    try:
        v1, v2 = get_vectors(word1, word2)
    except:
        return
    dist = 1/(1 + norm(v1 - v2))
    return dist

In [11]:
word1 = 'ass'
word2 = 'donkey'

print(cosine_similarity(word1, word2))
print(euclidean_distance(word1, word2))

0.64861095
0.5439753343093442


word1     word2     Cosine       Euclidean

War       Spear     0.24926732   0.25380212
Cat       Dog       0.9429585    0.06486539

In [30]:
 arr = [["restricted", "nature", "aspect"],
["character", "necessary", "mandatory"],
["requisite", "negate", "contradict"],
["refute", "negligent", "careless"],
["remiss", "negotiate", "bargain"],
["deal", "nice", "affable"],
["benign", "noble", "aristocratic"],
["distinguished", "novice", "beginner"],
["nonprofessional", "nuisance", "annoyance"],
["loyal", "objection", "disapproval"],
["protest", "obligatory", "compulsory"],
["required", "observe", "notice"],
["watch", "obvious", "conspicuous"],
["definite", "offend", "anger"],
["irritate", "offer", "bid"],
["proposal", "omen", "premonition"],
["sign", "omit", "exclude"],
["remove", "opportune", "advantageous"],
["placate", "pain", "ache"],
["discomfort", "paramount", "chief"],
["leading", "partisan", "biased"],
["dogmatic", "passive", "inactive"],
["lethargic", "pause", "break"],
["cease", "permeate", "diffuse"],
["disseminate", "perpetuate", "endure"],
["preserve", "perplex", "astonish"],
["baffle", "persecute", "afflict"],
["emanate", "radical", "basic"],
["fundamental", "range", "anger"],
["furor", "rank", "arrange"],
["classify", "realize", "accomplish"],
["fulfill", "recalcitrant", "obstinate"],
["stubborn", "receptacle", "container"],
["repository", "reconcile", "atone"],
["conciliate", "regret", "deplore"],
["grieve", "reliable", "dependable"],
["permit", "scope", "aim"],
["extent", "section", "division"],
["portion", "settle", "adjust"],
["compromise", "shallow", "superficial"],
["trivial", "shrewd", "careful"],
["calculating", "significant", "distinctive"],
["important", "slight", "delicate"],
["slender", "spontaneous", "impromptu"],
["unplanned", "spread", "announce"],
["broadcast", "stabilize", "balance"],
["subdue", "tangle", "intertwine"],
["twist", "temper", "mood"],
["nature", "tendency", "inclination"],
["trend", "term", "cycle"],
["duration", "thrift", "conservation"],
["prudence", "tough", "aggressive"],
["unyielding", "transfer", "convey"],
["exchange", "tumult", "agitation"],
["commotion", "turbulent", "disordered"],
["inflated", "valid", "authorized"],
["legitimate", "variety", "assortment"],
["diversify", "verify", "authenticate"]]

In [31]:
cs = [1, 1, 1]
ed = [1, 1, 1]
for i in range(len(arr)):
    cs[0] = [arr[i][0], arr[i][1], cosine_similarity(arr[i][0], arr[i][1])]
    ed[0] = [arr[i][0], arr[i][1], euclidean_distance(arr[i][0], arr[i][1])]
    cs[1] = [arr[i][1], arr[i][2], cosine_similarity(arr[i][1], arr[i][2])]
    ed[1] = [arr[i][1], arr[i][2], euclidean_distance(arr[i][1], arr[i][2])]
    cs[2] = [arr[i][0], arr[i][2], cosine_similarity(arr[i][0], arr[i][2])]
    ed[2] = [arr[i][0], arr[i][2], euclidean_distance(arr[i][0], arr[i][2])]
    try:
        for j in range(3):
            if cs[j][2] < 0.3:
                print(cs[j][0], '\t\t', cs[j][1], '\t\tCosine Similarity = ', cs[j][2], sep = '')
    except:
        pass
    
for i in range(len(arr)):
    cs[0] = [arr[i][0], arr[i][1], cosine_similarity(arr[i][0], arr[i][1])]
    ed[0] = [arr[i][0], arr[i][1], euclidean_distance(arr[i][0], arr[i][1])]
    cs[1] = [arr[i][1], arr[i][2], cosine_similarity(arr[i][1], arr[i][2])]
    ed[1] = [arr[i][1], arr[i][2], euclidean_distance(arr[i][1], arr[i][2])]
    cs[2] = [arr[i][0], arr[i][2], cosine_similarity(arr[i][0], arr[i][2])]
    ed[2] = [arr[i][0], arr[i][2], euclidean_distance(arr[i][0], arr[i][2])]
    try:
        for j in range(3):
            if ed[j][2] < 0.5:
                print(ed[j][0], '\t\t', ed[j][1], '\t\tEuclidean Similarity = ', ed[j][2], sep = '')

    except:
        pass

restricted		nature		Cosine Similarity = 0.23243318
requisite		contradict		Cosine Similarity = 0.20010321
refute		careless		Cosine Similarity = 0.28112528
remiss		bargain		Cosine Similarity = 0.18323237
nice		affable		Cosine Similarity = -0.038948134
deal		affable		Cosine Similarity = -0.07305119
benign		noble		Cosine Similarity = 0.23531143
noble		aristocratic		Cosine Similarity = 0.14214388
Word1 is not present in the model
Word1 is not present in the model
Word1 is not present in the model
Word1 is not present in the model
loyal		objection		Cosine Similarity = 0.09456172
loyal		disapproval		Cosine Similarity = 0.0967596
required		observe		Cosine Similarity = 0.24575338
obvious		conspicuous		Cosine Similarity = 0.29732957
watch		conspicuous		Cosine Similarity = -0.11826063
irritate		offer		Cosine Similarity = 0.19192529
irritate		bid		Cosine Similarity = -0.024354054
proposal		omen		Cosine Similarity = 0.22228998
proposal		premonition		Cosine Similarity = 0.20883064
sign		omit		Cosine

In [20]:
print(ed)

[['isolate', 'detach', 0.528712630784337], ['detach', 'quarantine', 0.5033920995611961], ['isolate', 'quarantine', 0.4680969671713597]]
