In [3]:
import numpy as np 
import scipy.spatial

def read_embeddings():
    FILE_NAME = '../data/glove.6B/glove.6B.100d.txt'
    embeddings = {}
    file = open(FILE_NAME,'r',encoding='utf-8')
    for line in file:
        values = line.split()
        word = values[0]
        vector = np.asarray(values[1:], dtype='float32')
        embeddings[word] = vector 
    file.close()
    print('Read %s embeddings. ' % len(embeddings))
    return embeddings
def print_n_closest(embeddings,vec0,n):
    word_distances = {}
    for (word, vec1) in embeddings.items():
        distance = scipy.spatial.distance.cosine(
            vec1,vec0
        )
        word_distances[distance] = word
    for distance in sorted(word_distances.keys())[:n]:
        word = word_distances[distance]
        print(word + ': %6.3f' % distance)

In [4]:
embeddings = read_embeddings()
lookop_word = 'hello'
print('\nWords closest to ' + lookop_word)
print_n_closest(embeddings,embeddings[lookop_word], 3)

lookop_word = 'precisely'
print('\nWords closest to ' + lookop_word)
print_n_closest(embeddings,embeddings[lookop_word], 3)


lookop_word = 'dog'
print('\nWords closest to ' + lookop_word)
print_n_closest(embeddings,embeddings[lookop_word], 3)

Read 400000 embeddings. 

Words closest to hello
hello:  0.000
goodbye:  0.209
hey:  0.283

Words closest to precisely
precisely:  0.000
exactly:  0.147
accurately:  0.293

Words closest to dog
dog:  0.000
cat:  0.120
dogs:  0.166


In [7]:

lookop_word = 'king'
print('\nWords closest to ' + lookop_word)
print_n_closest(embeddings,embeddings[lookop_word], 3)


Words closest to king
king:  0.000
prince:  0.232
queen:  0.249


In [6]:
lookop_word = '(king - man + woman)'
vec = embeddings['king']-embeddings['man']+embeddings['woman']
print('\nWords closest to ' + lookop_word)
print_n_closest(embeddings,vec, 3)


Words closest to (king - man + woman)
king:  0.145
queen:  0.217
monarch:  0.307


In [8]:

lookop_word = 'sweden'
print('\nWords closest to ' + lookop_word)
print_n_closest(embeddings,embeddings[lookop_word], 3)


Words closest to sweden
sweden:  0.000
denmark:  0.138
norway:  0.193


In [9]:

lookop_word = 'madrid'
print('\nWords closest to ' + lookop_word)
print_n_closest(embeddings,embeddings[lookop_word], 3)


Words closest to madrid
madrid:  0.000
barcelona:  0.157
valencia:  0.197


In [10]:
lookop_word = '(madrid - spain + sweden)'
vec = embeddings['madrid']-embeddings['spain']+embeddings['sweden']
print('\nWords closest to ' + lookop_word)
print_n_closest(embeddings,vec, 3)


Words closest to (madrid - spain + sweden)
stockholm:  0.271
sweden:  0.300
copenhagen:  0.305
