Word embeddings provide a dense representation of words and their relative meanings.They are an improvement over sparse representations used in simpler bag of word model representations.Word embeddings can be learned from text data and reused among projects. They can also be learned as part of fitting a neural network on text data.

In [1]:
#Importing Necessary Libraries for Tensorflow Hub
import numpy as np
import tensorflow_hub as hub

Text embedding based on skipgram version of word2vec with 1 out-of-vocabulary bucket. Maps from text to 250-dimensional embedding vectors.

In [2]:
#Loading the Dataset
embedding= hub.load ("https://tfhub.dev/google/Wiki-words-250/2")

In [3]:

a = embedding ([" king "])
b = embedding ([" man "])
c = embedding ([" woman "])
d = embedding ([" queen "])


In [4]:
# Storing the result ofvector(King) − vector(Man) + vector(Woman)
X= a-b+c
print("King-Man+Woman:", [X])

King-Man+Woman: [<tf.Tensor: shape=(1, 250), dtype=float32, numpy=
array([[-0.12994584, -0.17411731,  0.08241335, -0.09460426, -0.02963368,
         0.00833852, -0.01580581, -0.07200217, -0.0374763 , -0.01857486,
         0.04575163, -0.06350955,  0.0310036 ,  0.04439414,  0.06356907,
        -0.02856771,  0.05558529,  0.07249814, -0.02398837,  0.0455301 ,
        -0.07038731,  0.00989324, -0.05211002, -0.02339969, -0.10386609,
        -0.00747646, -0.0174827 , -0.15109651, -0.16803886, -0.03159868,
        -0.04374632,  0.0941674 ,  0.05635576, -0.11544351, -0.01706913,
        -0.0181496 ,  0.05560432,  0.01284544, -0.09410097,  0.08030569,
        -0.13458134, -0.12798919, -0.14993384,  0.06665589,  0.07577673,
        -0.12577374, -0.06869593, -0.10468247, -0.01054525, -0.07139292,
        -0.00569897, -0.02220746, -0.11851092, -0.09434773,  0.06391967,
         0.04567089,  0.14339279, -0.13938236, -0.03750862,  0.03229091,
         0.07670465,  0.05507568, -0.01978571,  0.0488369

In [5]:
# Storing the result ofvector(Woman) − vector(King) + vector(Queen) to find similarity of Man
Y= c-a+d
print("Woman-King+Queen:", [Y])

Woman-King+Queen: [<tf.Tensor: shape=(1, 250), dtype=float32, numpy=
array([[ 0.02118351, -0.12248733, -0.06041376,  0.07144362, -0.10978897,
         0.07119545,  0.04992773, -0.02743515, -0.06480911, -0.20285434,
         0.11473861,  0.02721604,  0.05260982,  0.1065385 ,  0.01394302,
        -0.1647489 ,  0.07203833,  0.11857994, -0.00095271, -0.09147857,
         0.03879085,  0.02359177,  0.04185368, -0.04939352, -0.07421876,
         0.03422938,  0.08419857,  0.09070219,  0.00358181,  0.1773604 ,
         0.1088568 ,  0.08282477,  0.12156986, -0.1598848 , -0.05240928,
        -0.13726465,  0.0233911 ,  0.00856919,  0.09519659, -0.03324209,
        -0.02363841, -0.17106755, -0.08100536, -0.05365514,  0.08478432,
        -0.0671075 , -0.1854074 ,  0.07032573, -0.07756672, -0.08777828,
         0.08333436,  0.0141644 , -0.09988222,  0.08947498,  0.01964116,
         0.01092771,  0.01083473, -0.01248896,  0.08680015,  0.01968326,
        -0.10005222, -0.08237934, -0.13430107,  0.08316

In [6]:
# Storing the result ofvector(King) − vector(Man) to find similarity with vector(Queen) − vector(Woman)
Z= a-b #King-Man
U= d-c #Queen-Woman

In [7]:
#Calculating the Cosine Similarity between Vectors of Words.
from sklearn.metrics.pairwise import cosine_similarity

print("Similarity between King-Man+Woman and Queen:",cosine_similarity(X,d))
print("Similarity between Woman-King+Queen and Man:",cosine_similarity(Y,b))
print("Similarity between King-Man and Queen-Woman:",cosine_similarity(Z,U))
print("Similarity between Man and Woman:",cosine_similarity(b,c))
print("Similarity between King and Queen:",cosine_similarity(a,d))
print("Similarity between Man and King:",cosine_similarity(a,b))
print("Similarity between Woman and Queen:",cosine_similarity(c,d))

Similarity between King-Man+Woman and Queen: [[0.763873]]
Similarity between Woman-King+Queen and Man: [[0.63743]]
Similarity between King-Man and Queen-Woman: [[0.7635017]]
Similarity between Man and Woman: [[0.8381338]]
Similarity between King and Queen: [[0.71021986]]
Similarity between Man and King: [[0.36127636]]
Similarity between Woman and Queen: [[0.5902161]]


 To Find the top 5 closest words to the word "dog"

In [8]:
#Taking the Vector Representation of Word "dog"
e = embedding (["dog"])

In [9]:
#Trying to find the relationship of below words with "dog"
f = embedding (["doggy"])
g = embedding (["hound"])
h = embedding (["pup"])
i = embedding (["canine"])
j = embedding (["puppy"])
k = embedding (["pet"])

In [10]:
print("Similarity between Dog and Pup:",cosine_similarity(e,h))
print("Similarity between Dog and Doggy:",cosine_similarity(e,f))
print("Similarity between Dog and Canine:",cosine_similarity(e,i))
print("Similarity between Dog and Puppy:",cosine_similarity(e,j))
print("Similarity between Dog and Hound:",cosine_similarity(e,g))
print("Similarity between Dog and Pet:",cosine_similarity(e,k))

Similarity between Dog and Pup: [[0.6987275]]
Similarity between Dog and Doggy: [[0.6388842]]
Similarity between Dog and Canine: [[0.6049171]]
Similarity between Dog and Puppy: [[0.8488173]]
Similarity between Dog and Hound: [[0.78350145]]
Similarity between Dog and Pet: [[0.8255564]]


In [43]:
l = embedding (["bull dog"])
print("Similarity between Dog and Bulldog:",cosine_similarity(e,l))
m = embedding (["sheep dog"])
print("Similarity between Dog and Sheepdog:",cosine_similarity(e,m))
n = embedding (["cat"])
print("Similarity between Dog and cat:",cosine_similarity(e,n))
o = embedding (["kennel"])
print("Similarity between Dog and cat:",cosine_similarity(e,o))
o = embedding (["pet dog"])
print("Similarity between Dog and cat:",cosine_similarity(e,o))

Similarity between Dog and Bulldog: [[0.86425537]]
Similarity between Dog and Sheepdog: [[0.88398373]]
Similarity between Dog and cat: [[0.8490828]]
Similarity between Dog and cat: [[0.7129229]]
Similarity between Dog and cat: [[0.95539427]]
