ref:
https://github.com/RaRe-Technologies/gensim/blob/920c029ca97f961c8df264672c34936607876694/docs/notebooks/Poincare%20Tutorial.ipynb

In [1]:
import os
import logging
from tqdm import tqdm_notebook as tqdm
import numpy as np
from nltk.corpus import wordnet as wn
from gensim.models.poincare import PoincareModel, PoincareKeyedVectors, PoincareRelations
logging.basicConfig(level=logging.INFO)

In [2]:
PoincareModel(train_data=[('node.1', 'node.2'), ('node.2', 'node.3')])

INFO:gensim.models.poincare:Loading relations from train data..
INFO:gensim.models.poincare:Loaded 2 relations from train data, 3 nodes


<gensim.models.poincare.PoincareModel at 0x117f5f7b8>

In [3]:
words = wn.words()

In [4]:
taxi_synets = wn.synsets("dog", pos="n")
taxi_synets

[Synset('dog.n.01'),
 Synset('frump.n.01'),
 Synset('dog.n.03'),
 Synset('cad.n.01'),
 Synset('frank.n.02'),
 Synset('pawl.n.01'),
 Synset('andiron.n.01')]

In [5]:
taxi_synets[0].hypernyms()

[Synset('canine.n.02'), Synset('domestic_animal.n.01')]

In [6]:
taxi_synets[0].hypernym_paths()

[[Synset('entity.n.01'),
  Synset('physical_entity.n.01'),
  Synset('object.n.01'),
  Synset('whole.n.02'),
  Synset('living_thing.n.01'),
  Synset('organism.n.01'),
  Synset('animal.n.01'),
  Synset('chordate.n.01'),
  Synset('vertebrate.n.01'),
  Synset('mammal.n.01'),
  Synset('placental.n.01'),
  Synset('carnivore.n.01'),
  Synset('canine.n.02'),
  Synset('dog.n.01')],
 [Synset('entity.n.01'),
  Synset('physical_entity.n.01'),
  Synset('object.n.01'),
  Synset('whole.n.02'),
  Synset('living_thing.n.01'),
  Synset('organism.n.01'),
  Synset('animal.n.01'),
  Synset('domestic_animal.n.01'),
  Synset('dog.n.01')]]

In [7]:
nouns = set([])
for word in tqdm(words):
    nouns.update(wn.synsets(word, pos='n'))




In [8]:
len(nouns)

82115

In [9]:
hypernyms = set([])
for noun in nouns:
    for path in noun.hypernym_paths():
        hypernyms.update( (noun, hypernym) for hypernym in path[1:] if hypernym.pos() == 'n')
with open("hypernyms.tsv", 'w') as f:
        for n1, n2 in hypernyms:
            print(n1.name(), n2.name(), sep="\t", file=f)

In [10]:
mammals = []
mammal_tree = []
target = wn.synset("mammal.n.01")
for noun in nouns:
    for path in noun.hypernym_paths():
        if target in path:
            pos = path.index(target)
            for i in range(pos, len(path)-1):
                mammals.append((noun, path[i]))
                mammal_tree.append((noun.name(), path[i].name()))
with open("mammals.tsv", 'w') as f:
        for n1, n2 in mammals:
            print(n1.name(), n2.name(), sep="\t", file=f)

In [11]:
len(mammals)

6575

In [12]:
relations = PoincareRelations(file_path="mammals.tsv", delimiter='\t')

In [13]:
model = PoincareModel(train_data=relations, size=2, burn_in=0)
model.train(epochs=300, print_every=1000)

INFO:gensim.models.poincare:Loading relations from train data..
INFO:gensim.models.poincare:Loaded 6575 relations from train data, 1182 nodes
INFO:gensim.models.poincare:training model of size 2 with 1 workers on 6575 relations for 300 epochs and 0 burn-in epochs, using lr=0.10000 burn-in lr=0.01000 negative=10
INFO:gensim.models.poincare:Starting training (300 epochs)----------------------------------------
INFO:gensim.models.poincare:Training finished


In [14]:
model.save('./mammal_test_model')
model=PoincareModel.load('./mammal_test_model')
model.kv.save_word2vec_format('./mammal_test_vectors')
PoincareKeyedVectors.load_word2vec_format('./mammal_test_vectors')

INFO:gensim.utils:saving PoincareModel object under ./mammal_test_model, separately None
INFO:gensim.utils:saved ./mammal_test_model
INFO:gensim.utils:loading PoincareModel object from ./mammal_test_model
INFO:gensim.utils:loading kv recursively from ./mammal_test_model.kv.* with mmap=None
INFO:gensim.utils:loaded ./mammal_test_model
INFO:gensim.models.keyedvectors:storing 1182x2 projection weights into ./mammal_test_vectors
INFO:gensim.models.keyedvectors:loading projection weights from ./mammal_test_vectors
INFO:gensim.models.keyedvectors:loaded (1182, 2) matrix from ./mammal_test_vectors


<gensim.models.poincare.PoincareKeyedVectors at 0x13384af60>

In [15]:
model.kv.distance('dog.n.01', 'cat.n.01')

9.5131588010697978

In [26]:
model.kv.word_vec("dog.n.01")

array([ 0.47632694, -0.79934357])

In [32]:
model.kv.distance('dog.n.01', 'mammal.n.01')

3.4071299790334635

In [34]:
model.kv.descendants('mammal.n.01')

['carnivore.n.01', 'canine.n.02', 'dog.n.01', 'terrier.n.01', 'corgi.n.01']

In [35]:
model.kv.ancestors('dog.n.01')

['canine.n.02', 'carnivore.n.01', 'placental.n.01']

In [37]:
print(model.kv.norm('dog.n.01'))
print(model.kv.norm('mammal.n.01'))

0.930503899482
0.0535688026934


In [67]:
#model.kv.most_similar('electricity.n.01')

In [19]:
model.kv.most_similar('dog.n.01')

[('canine.n.02', 1.1928666743540166),
 ('carnivore.n.01', 2.111801331798476),
 ('terrier.n.01', 2.610616537051652),
 ('placental.n.01', 3.361364472939095),
 ('mammal.n.01', 3.4071299790334635),
 ('hunting_dog.n.01', 3.633245243348729),
 ('corgi.n.01', 4.076441755109278),
 ('ungulate.n.01', 4.742642172560402),
 ('hound.n.01', 5.142652873001465),
 ('poodle.n.01', 5.484196356419966)]

In [20]:
import pickle
from plotly.offline import init_notebook_mode, iplot
from gensim.viz.poincare import poincare_2d_visualization, poincare_distance_heatmap

init_notebook_mode(connected=True)

show_node_labels = [
    'mammal.n.01', 'placental.n.01', 'ungulate.n.01', 'carnivore.n.01', 'rodent.n.01',
    'canine.n.02', 'even-toed_ungulate.n.01', 'odd-toed_ungulate.n.01', 'elephant.n.01',
    'rhinoceros.n.01', 'german_shepherd.n.01', 'feline.n.01', 'tiger.n.02', 'homo_sapiens.n.01']

In [27]:
figure_title = """
<b>2-D Visualization of model trained on mammals subtree</b><br>
"""
iplot(poincare_2d_visualization(model, mammal_tree, figure_title, show_node_labels=show_node_labels))

In [29]:
iplot(poincare_distance_heatmap([0.0, 0.0]))

AttributeError: type object 'PoincareKeyedVectors' has no attribute 'poincare_dists'