#### Read model

In [None]:
filename = "declaration-of-independence"

In [None]:
with open("{}.vocab".format(filename), "r") as fd_vocab:
    vocab = [s.strip() for s in fd_vocab.readlines()]
len(vocab), vocab[int(len(vocab)/2)]

lookup = {}
for i, v in enumerate(vocab):
    lookup[v] = i

In [None]:
with open("{}.center".format(filename), "r") as fd_center:
    embedding = [line.split() for line in fd_center.readlines()]
    center_embedding = [[float(element) for element in elements[0:-1]] for elements in embedding]
    center_bias = [float(elements[-1]) for elements in embedding]
len(center_embedding), len(center_embedding[0]), len(center_bias)

In [None]:
with open("{}.context".format(filename), "r") as fd_context:
    embedding = [line.split() for line in fd_context.readlines()]
    context_embedding = [[float(element) for element in elements[0:-1]] for elements in embedding]
    context_bias = [float(elements[-1]) for elements in embedding]
len(context_embedding), len(context_embedding[0]), len(context_bias)

#### Open analogy file

In [None]:
with open("question-words.txt", "r") as fd:
    all_elements = [line.split() for line in fd.readlines()]
    analogy_titles = []
    analogies = []
    for elements in all_elements:
        if elements[0] == ":":
            analogy_titles += [elements[1]]
            analogies += [[]]
        else:
            analogies[-1] += [elements]
len(analogy_titles), len(analogies), len(analogies[0]), len(analogies[-1])

#### Begin analogy test

In [None]:
import math
def euclidean(w, v):
    """
    Calculate Euclidean distance.
    """
    dsum = 0.0
    for i in range(len(w)):
        dsum += (w[i] - v[i])**2
    return math.sqrt(dsum)

def nn(w, embedding):
    """
    Nearest neighbor.
    """
    min_index = 0
    min_distance = euclidean(w, embedding[0])
    for i, v in enumerate(embedding[1:]):
        distance = euclidean(w, v)
        if distance < min_distance:
            min_index = i
            min_distance = distance
    return min_index, min_distance

the_index, the_distance = nn(context_embedding[lookup['the']], center_embedding)
vocab[the_index], the_distance

In [None]:
def test(embedding):
    for i, category in enumerate(analogies):
        print("{}: ".format(analogy_titles[i]), end='')
        correct_count = 0
        for terms in category:
            # e.g. man : woman :: king : (queen)
            try:
                word0 = embedding[lookup[terms[0]]]
                word1 = embedding[lookup[terms[1]]]
                word2 = embedding[lookup[terms[2]]]
                index, distance = nn(word2 - word0 + word1, center_embedding)
                if vocab[index] == terms[3]:
                    correct_count += 1
            except:
                pass
        print("{}/{} = {}".format(correct_count, len(category), correct_count/len(category)))

test(center_embedding)