In [4]:
from collections import defaultdict
from operator import itemgetter

import numpy as np


In [5]:
def LoadEmbedding(fname):
    with open(fname, 'r') as f:
        lines = f.readlines()
    lines = [line.strip() for line in lines]
    return lines


def LoadEdges(fname):
    with open(fname, 'r') as f:
        lines = f.readlines()
    lines = [line.strip() for line in lines]
    return lines


def LoadMappings(fname):
    with open(fname, 'r') as f:
        lines = f.readlines()
    lines = [line.strip() for line in lines]
    return lines


In [6]:
# Hyperbolic distance between two points

def dist(u, v):
    z = 2 * np.linalg.norm(u - v) ** 2
    uu = 1
    nu = np.linalg.norm(u)
    nv = np.linalg.norm(v)
    if nu != 1 and nv != 1:
        uu = 1. + z/((1-nu**2)*(1-nv**2))
    return np.arccosh(uu)


In [7]:
def ProcessEmbeddings(emb, dim):
    emb = emb[1:]
    embDict = dict()
    tau = 0.0
    for it, line in enumerate(emb):
        lst = line.split(',')
        currTau = np.float64(lst[-1])
        idx = np.int64(lst[0])
        embDict[idx] = np.asarray(lst[1:-1], dtype=np.float64)
        assert embDict[idx].shape[0] == dim
        tau = currTau
    return embDict, tau


def BuildWMatrix(embDict, dim):
    vocabSize = len(embDict)
    W = np.zeros((vocabSize, dim), dtype=np.float64)
    for idx, emb in embDict.items():
        W[idx, :] = emb
    return W, vocabSize



In [8]:
def BuildEdgeDict(fname):
    edges = LoadEdges(fname)
    edgeDict = defaultdict(list)
    for edge in edges:
        k, v = edge.split(' ')
        edgeDict[np.int64(k)].append(np.int64(v))
    return edgeDict


def BuildMappingDict(fname):
    mappings = LoadMappings(fname)
    mappingDict = dict()
    revMapping = dict()
    for mapping in mappings:
        k, v = mapping.split(' ')
        mappingDict[k] = np.int64(v)
        revMapping[np.int64(v)] = k
    return mappingDict, revMapping



In [9]:
def build(fedge, fmap, femb, dim, key):
    edgeDict = BuildEdgeDict(fedge)
    mappingDict, revIndex = BuildMappingDict(fmap)
    emb = LoadEmbedding(femb)
    embDict, tau = ProcessEmbeddings(emb, dim)
    print(f"tau={tau}")
    W, vocabSize = BuildWMatrix(embDict, dim)

    word = key
    wid = mappingDict[word]
    wrels = edgeDict[mappingDict[word]]

    d = []
    wvec = embDict[wid]
    for idx in range(W.shape[0]):
        vec = W[idx, :]
        if not np.equal(vec, wvec).all():
            d.append((idx, dist(wvec, vec) / tau))
    print()
    print()
    print(f"Relations for {word} in embedding using distance metric")
    bestMatches = []
    for k, v in sorted(d, key=itemgetter(1))[:13]:
        print(f"{revIndex[k]} {v}")
        bestMatches.append(revIndex[k])
    print()
    print()
    print(f"Children of {word} in graph")
    for wrel in wrels:
        print(f"\t{revIndex[wrel]}")
    return bestMatches


def buildWithEmb(fedge, fmap, emb, dim, key):
    edgeDict = BuildEdgeDict(fedge)
    mappingDict, revIndex = BuildMappingDict(fmap)
    embDict, tau = ProcessEmbeddings(emb, dim)
    print(f"tau={tau}")
    W, vocabSize = BuildWMatrix(embDict, dim)

    word = key
    wid = mappingDict[word]
    wrels = edgeDict[mappingDict[word]]

    d = []
    wvec = embDict[wid]
    for idx in range(W.shape[0]):
        vec = W[idx, :]
        if not np.equal(vec, wvec).all():
            d.append((idx, dist(wvec, vec) / tau))
    print()
    print()
    print(f"Relations for {word} in embedding using distance metric")
    bestMatches = []
    for k, v in sorted(d, key=itemgetter(1))[:13]:
        print(f"{revIndex[k]} {v}")
        bestMatches.append(revIndex[k])
    print()
    print()
    print(f"Children of {word} in graph")
    for wrel in wrels:
        print(f"\t{revIndex[wrel]}")
    return bestMatches

In [126]:
build('data/edges/small-wordnet.edges', 'data/mappings/small-wordnet.mapping', \
      'data/emb/smlwnr10.emb', 10, 'interact.v.01')

tau=2.1994093228131533


Relations for interact.v.01 in embedding using distance metric
patronize.v.03 0.9999999965586915
treat.v.01 0.9999999966782599
transact.v.01 0.9999999973199245
socialize.v.01 0.9999999974391658
intervene.v.01 0.9999999979665186
meet.v.07 0.9999999984989781
get_in_touch.v.01 0.999999998631977
consort.v.01 0.9999999986358267
communicate.v.02 0.9999999991339558
*root* 1.000000000008325
get_around_to.v.01 1.1521209005443203
go.v.02 1.1665013095787333
condescend.v.02 1.2172611543929448


Children of interact.v.01 in graph
	communicate.v.02
	consort.v.01
	get_in_touch.v.01
	intervene.v.01
	meet.v.07
	patronize.v.03
	socialize.v.01
	transact.v.01
	treat.v.01


['patronize.v.03',
 'treat.v.01',
 'transact.v.01',
 'socialize.v.01',
 'intervene.v.01',
 'meet.v.07',
 'get_in_touch.v.01',
 'consort.v.01',
 'communicate.v.02',
 '*root*',
 'get_around_to.v.01',
 'go.v.02',
 'condescend.v.02']

In [11]:
build('data/edges/small-wordnet.edges', 'data/mappings/small-wordnet.mapping', \
      'data/emb/smlwnr10.emb', 10, 'patronize.v.03')

tau=2.1994093228131533


Relations for patronize.v.03 in embedding using distance metric
interact.v.01 0.9999999965586915
stoop_to.v.01 1.0000000108099445
transact.v.01 1.4679328049012716
intervene.v.01 1.5219365402686673
communicate.v.02 1.5255099305988278
treat.v.01 1.556193164095037
socialize.v.01 1.6702956975008312
consort.v.01 1.7209652533198887
get_in_touch.v.01 1.8174770372076614
*root* 1.8402061311961282
get_around_to.v.01 1.8828823323648947
meet.v.07 1.903019503817008
gloss_over.v.01 1.9032203343027059


Children of patronize.v.03 in graph
	stoop_to.v.01


['interact.v.01',
 'stoop_to.v.01',
 'transact.v.01',
 'intervene.v.01',
 'communicate.v.02',
 'treat.v.01',
 'socialize.v.01',
 'consort.v.01',
 'get_in_touch.v.01',
 '*root*',
 'get_around_to.v.01',
 'meet.v.07',
 'gloss_over.v.01']

In [127]:
targets = build('data/edges/apple-senses.edges', 'data/mappings/apple-senses.mapping', 'data/emb/as10.emb', 10, 'apple.n.01')


tau=4.3988186456263065


Relations for apple.n.01 in embedding using distance metric
fruit.n.01 1.0003753562559496
natural_object.n.01 2.000377853723166
physical_entity.n.01 3.0003779119412344
living_thing.n.01 3.8234675779643426
entity.n.01 3.831769813611903
abstraction.n.06 4.772116442658565
tree.n.01 4.823418464545469
social_group.n.01 5.603823020779371
artifact.n.01 5.714554762602499
apple.n.02 5.823136354158206
company.n.01 6.603780823741919
machine.n.01 6.714532590984551
apple.n.03 7.604546414482919


Children of apple.n.01 in graph


In [128]:
targets = build('data/edges/apple-senses.edges', 'data/mappings/apple-senses.mapping', 'data/emb/as10.emb', 10, 'fruit.n.01')


tau=4.3988186456263065


Relations for fruit.n.01 in embedding using distance metric
natural_object.n.01 1.0000024974753996
apple.n.01 1.0003753562559496
physical_entity.n.01 2.000002555693464
living_thing.n.01 2.823092227928694
entity.n.01 2.831394463233553
abstraction.n.06 3.771741092306799
tree.n.01 3.8230431145138755
social_group.n.01 4.603447670432201
artifact.n.01 4.714179412250374
apple.n.02 4.822761004126613
company.n.01 5.6034054733947505
machine.n.01 5.714157240632425
apple.n.03 6.60417106413575


Children of fruit.n.01 in graph
	apple.n.01


In [131]:
targets = build('data/edges/apple-senses.edges', 'data/mappings/apple-senses.mapping', 'data/emb/as10.emb', 10, 'apple.n.01')
edges = BuildEdgeDict('data/edges/apple-senses.edges')
print(edges)

tau=4.3988186456263065


Relations for apple.n.01 in embedding using distance metric
fruit.n.01 1.0003753562559496
natural_object.n.01 2.000377853723166
physical_entity.n.01 3.0003779119412344
living_thing.n.01 3.8234675779643426
entity.n.01 3.831769813611903
abstraction.n.06 4.772116442658565
tree.n.01 4.823418464545469
social_group.n.01 5.603823020779371
artifact.n.01 5.714554762602499
apple.n.02 5.823136354158206
company.n.01 6.603780823741919
machine.n.01 6.714532590984551
apple.n.03 7.604546414482919


Children of apple.n.01 in graph
defaultdict(<class 'list'>, {0: [1, 2], 1: [3, 4], 2: [5, 6], 3: [7], 4: [8], 5: [9], 6: [10], 7: [11], 8: [12], 9: [13], 10: [14]})


In [10]:
targets = build('data/edges/apple-senses.edges', 'data/mappings/apple-senses.mapping', 'data/emb/as10.emb', 10, 'apple.n.02')

tau=4.3988186456263065


Relations for apple.n.02 in embedding using distance metric
tree.n.01 0.9997178951984218
living_thing.n.01 1.9997092313129867
physical_entity.n.01 2.999709306937243
natural_object.n.01 3.82279895610494
entity.n.01 3.9404916876831533
fruit.n.01 4.822761004126613
abstraction.n.06 4.879597805855609
social_group.n.01 5.71140583888185
artifact.n.01 5.8220562519409915
apple.n.01 5.823136354158206
company.n.01 6.711363673980225
machine.n.01 6.822034084239871
apple.n.03 7.7121292647261575


Children of apple.n.02 in graph
